You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/06/07 20:14:27 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@81702192b49ddb37ce3e179eec3e88f3726acec1)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new bafcec890 deploying docs (apache/tvm@81702192b49ddb37ce3e179eec3e88f3726acec1)
bafcec890 is described below

commit bafcec890ec190515670a6c968c0ce641850da58
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Tue Jun 7 20:14:20 2022 +0000

    deploying docs (apache/tvm@81702192b49ddb37ce3e179eec3e88f3726acec1)
---
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    2 +-
 .../how_to/compile_models/from_paddle.rst.txt      |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    2 +-
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   18 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |   10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   16 +-
 .../tune_conv2d_layer_cuda.rst.txt                 |  984 ++++++-
 .../tune_network_cuda.rst.txt                      |    2 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |  122 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |   10 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |   34 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |   12 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |   12 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    8 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   18 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    6 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |    7 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   54 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   26 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   47 +-
 docs/commit_hash                                   |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       |   77 +-
 docs/how_to/compile_models/from_paddle.html        |    2 +-
 docs/how_to/compile_models/from_pytorch.html       |    6 +-
 docs/how_to/compile_models/from_tensorflow.html    |    2 +-
 docs/how_to/compile_models/sg_execution_times.html |   22 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   17 +-
 docs/how_to/deploy_models/deploy_prequantized.html |    6 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   38 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   18 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |   10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    |  983 ++++++-
 .../tune_with_autoscheduler/tune_network_cuda.html |    2 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |  122 +-
 .../tune_with_autotvm/sg_execution_times.html      |   10 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |   34 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 docs/how_to/work_with_microtvm/micro_train.html    |   12 +-
 .../work_with_microtvm/sg_execution_times.html     |   12 +-
 .../how_to/work_with_relay/sg_execution_times.html |    8 +-
 .../work_with_schedules/sg_execution_times.html    |   18 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 .../api/doxygen/apply__history__best_8h.html       |   11 +-
 .../api/doxygen/apply__history__best_8h__incl.svg  | 2650 ++++++++++---------
 .../doxygen/apply__history__best_8h_source.html    |   23 +-
 docs/reference/api/doxygen/arg__info_8h.html       |    7 +-
 .../api/doxygen/arg__info_8h__dep__incl.svg        |  314 ++-
 docs/reference/api/doxygen/arg__info_8h__incl.svg  | 1307 +++++-----
 .../reference/api/doxygen/arg__info_8h_source.html |   21 +-
 docs/reference/api/doxygen/array_8h.html           |    2 +-
 docs/reference/api/doxygen/array_8h__dep__incl.svg | 2023 ++++++++-------
 .../doxygen/attr__registry__map_8h__dep__incl.svg  |   48 +-
 docs/reference/api/doxygen/block__scope_8h.html    |    2 +-
 .../api/doxygen/block__scope_8h__dep__incl.svg     |  326 +--
 docs/reference/api/doxygen/builder_8h.html         |   10 +-
 docs/reference/api/doxygen/builder_8h__incl.svg    | 1870 +++++++-------
 docs/reference/api/doxygen/builder_8h_source.html  |   46 +-
 docs/reference/api/doxygen/c__runtime__api_8h.html |    2 +-
 .../api/doxygen/c__runtime__api_8h__dep__incl.svg  | 1700 +++++++------
 docs/reference/api/doxygen/classes.html            |   20 +-
 ...__schedule_1_1ApplyHistoryBestNode-members.html |    2 +-
 ..._1_1meta__schedule_1_1ApplyHistoryBestNode.html |   10 +-
 ...eta__schedule_1_1ExtractedTaskNode-members.html |    2 +-
 ...tvm_1_1meta__schedule_1_1ExtractedTaskNode.html |   10 +-
 ..._1_1meta__schedule_1_1FeatureExtractorNode.html |    2 +-
 ...m_1_1meta__schedule_1_1MeasureCallbackNode.html |    2 +-
 ...stvm_1_1meta__schedule_1_1MeasureCandidate.html |    4 +-
 ..._1_1meta__schedule_1_1MeasureCandidateNode.html |    4 +-
 ...1meta__schedule_1_1PyCostModelNode-members.html |    8 +-
 ...sstvm_1_1meta__schedule_1_1PyCostModelNode.html |    8 +-
 ...schedule_1_1PyFeatureExtractorNode-members.html |    2 +-
 ..._1meta__schedule_1_1PyFeatureExtractorNode.html |   12 +-
 ..._schedule_1_1PyMeasureCallbackNode-members.html |    2 +-
 ...1_1meta__schedule_1_1PyMeasureCallbackNode.html |   12 +-
 ...1_1meta__schedule_1_1PyMutatorNode-members.html |    4 +-
 ...lasstvm_1_1meta__schedule_1_1PyMutatorNode.html |    4 +-
 ..._1meta__schedule_1_1PyPostprocNode-members.html |    4 +-
 ...asstvm_1_1meta__schedule_1_1PyPostprocNode.html |    4 +-
 ...ta__schedule_1_1PyScheduleRuleNode-members.html |    4 +-
 ...vm_1_1meta__schedule_1_1PyScheduleRuleNode.html |    4 +-
 ...__schedule_1_1PySearchStrategyNode-members.html |    8 +-
 ..._1_1meta__schedule_1_1PySearchStrategyNode.html |   18 +-
 ...__schedule_1_1PySpaceGeneratorNode-members.html |    4 +-
 ..._1_1meta__schedule_1_1PySpaceGeneratorNode.html |    4 +-
 ...a__schedule_1_1PyTaskSchedulerNode-members.html |   10 +-
 ...m_1_1meta__schedule_1_1PyTaskSchedulerNode.html |   10 +-
 ...vm_1_1meta__schedule_1_1SearchStrategyNode.html |    2 +-
 docs/reference/api/doxygen/data__type_8h.html      |    2 +-
 .../api/doxygen/data__type_8h__dep__incl.svg       | 2048 ++++++++-------
 docs/reference/api/doxygen/database_8h.html        |   13 +-
 .../api/doxygen/database_8h__dep__incl.svg         |  102 +-
 docs/reference/api/doxygen/database_8h__incl.svg   | 2602 +++++++++----------
 docs/reference/api/doxygen/database_8h_source.html |   73 +-
 .../api/doxygen/diagnostic_8h__dep__incl.svg       |  392 +--
 docs/reference/api/doxygen/dir_000004_000007.html  |    2 +-
 docs/reference/api/doxygen/dir_000004_000008.html  |    2 +-
 docs/reference/api/doxygen/dir_000004_000011.html  |    2 +-
 docs/reference/api/doxygen/dir_000004_000013.html  |    2 +-
 docs/reference/api/doxygen/dir_000004_000017.html  |    2 +-
 .../dir_4378f18824ae7d4ad48f8d7785cd7ac8.html      |    2 +
 .../dir_4378f18824ae7d4ad48f8d7785cd7ac8_dep.svg   |   20 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |   20 +-
 docs/reference/api/doxygen/error_8h__dep__incl.svg |  388 +--
 docs/reference/api/doxygen/extracted__task_8h.html |    9 +-
 .../api/doxygen/extracted__task_8h__incl.svg       | 2540 +++++++++----------
 .../api/doxygen/extracted__task_8h_source.html     |   23 +-
 .../api/doxygen/feature__extractor_8h.html         |   10 +-
 .../api/doxygen/feature__extractor_8h__incl.svg    | 2026 +++++++--------
 .../api/doxygen/feature__extractor_8h_source.html  |   29 +-
 docs/reference/api/doxygen/files.html              |   17 +-
 docs/reference/api/doxygen/functions_a.html        |    2 +-
 docs/reference/api/doxygen/functions_e.html        |    2 +-
 docs/reference/api/doxygen/functions_func_a.html   |    2 +-
 docs/reference/api/doxygen/functions_func_e.html   |    2 +-
 docs/reference/api/doxygen/functions_func_n.html   |    2 +-
 docs/reference/api/doxygen/functions_func_t.html   |   10 +-
 docs/reference/api/doxygen/functions_func_v.html   |   14 +-
 docs/reference/api/doxygen/functions_n.html        |    2 +-
 docs/reference/api/doxygen/functions_s.html        |    6 +-
 docs/reference/api/doxygen/functions_t.html        |    4 +-
 docs/reference/api/doxygen/functions_v.html        |    8 +-
 docs/reference/api/doxygen/functor_8h.html         |    2 +-
 .../api/doxygen/functor_8h__dep__incl.svg          | 1312 +++++-----
 docs/reference/api/doxygen/hierarchy.html          |    4 +-
 docs/reference/api/doxygen/index__map_8h.html      |    2 +-
 .../api/doxygen/index__map_8h__dep__incl.svg       | 1028 ++++----
 docs/reference/api/doxygen/instruction_8h.html     |    2 +-
 .../api/doxygen/instruction_8h__dep__incl.svg      |  394 +--
 .../api/doxygen/instrument_8h__dep__incl.svg       |  380 +--
 docs/reference/api/doxygen/ir_2adt_8h.html         |    2 +-
 .../api/doxygen/ir_2adt_8h__dep__incl.svg          | 1356 +++++-----
 docs/reference/api/doxygen/ir_2attrs_8h.html       |    2 +-
 .../api/doxygen/ir_2attrs_8h__dep__incl.svg        | 1376 +++++-----
 docs/reference/api/doxygen/ir_2expr_8h.html        |    2 +-
 .../api/doxygen/ir_2expr_8h__dep__incl.svg         | 1251 ++++-----
 docs/reference/api/doxygen/ir_2function_8h.html    |    2 +-
 .../api/doxygen/ir_2function_8h__dep__incl.svg     | 1377 +++++-----
 docs/reference/api/doxygen/ir_2module_8h.html      |    2 +-
 .../api/doxygen/ir_2module_8h__dep__incl.svg       | 1348 +++++-----
 docs/reference/api/doxygen/ir_2span_8h.html        |    2 +-
 .../api/doxygen/ir_2span_8h__dep__incl.svg         | 1719 ++++++-------
 .../api/doxygen/ir_2transform_8h__dep__incl.svg    |  408 +--
 docs/reference/api/doxygen/ir_2type_8h.html        |    2 +-
 .../api/doxygen/ir_2type_8h__dep__incl.svg         | 1600 ++++++------
 docs/reference/api/doxygen/map_8h.html             |    2 +-
 docs/reference/api/doxygen/map_8h__dep__incl.svg   | 1819 +++++++------
 .../api/doxygen/measure__callback_8h.html          |    8 +-
 .../doxygen/measure__callback_8h__dep__incl.svg    |   12 +-
 .../api/doxygen/measure__callback_8h__incl.svg     | 2553 +++++++++++--------
 .../api/doxygen/measure__callback_8h_source.html   |   25 +-
 ...postproc_8h.html => measure__candidate_8h.html} |   29 +-
 .../doxygen/measure__candidate_8h__dep__incl.svg   |  146 ++
 .../api/doxygen/measure__candidate_8h__incl.svg    | 1480 +++++++++++
 .../api/doxygen/measure__candidate_8h_source.html  |   96 +
 .../doxygen/meta__schedule_2cost__model_8h.html    |   14 +-
 .../meta__schedule_2cost__model_8h__dep__incl.svg  |   92 +-
 .../meta__schedule_2cost__model_8h__incl.svg       | 2528 +++++++++---------
 .../meta__schedule_2cost__model_8h_source.html     |   39 +-
 docs/reference/api/doxygen/mutator_8h.html         |   10 +-
 docs/reference/api/doxygen/mutator_8h__incl.svg    | 2292 ++++++++---------
 docs/reference/api/doxygen/mutator_8h_source.html  |   28 +-
 docs/reference/api/doxygen/ndarray_8h.html         |    2 +-
 .../api/doxygen/ndarray_8h__dep__incl.svg          | 1715 +++++++------
 docs/reference/api/doxygen/node_8h.html            |    2 +-
 docs/reference/api/doxygen/node_8h__dep__incl.svg  | 1580 ++++++------
 docs/reference/api/doxygen/object_8h.html          |    2 +-
 .../reference/api/doxygen/object_8h__dep__incl.svg | 2275 +++++++++--------
 docs/reference/api/doxygen/optional_8h.html        |    2 +-
 .../api/doxygen/optional_8h__dep__incl.svg         | 1950 +++++++-------
 docs/reference/api/doxygen/packed__func_8h.html    |    2 +-
 .../api/doxygen/packed__func_8h__dep__incl.svg     | 1496 +++++------
 docs/reference/api/doxygen/postproc_8h.html        |    7 +-
 docs/reference/api/doxygen/postproc_8h__incl.svg   | 2279 ++++++++---------
 docs/reference/api/doxygen/postproc_8h_source.html |   25 +-
 docs/reference/api/doxygen/random__engine_8h.html  |    2 +-
 .../api/doxygen/random__engine_8h__dep__incl.svg   |  348 +--
 docs/reference/api/doxygen/reflection_8h.html      |    2 +-
 .../api/doxygen/reflection_8h__dep__incl.svg       | 1802 ++++++-------
 .../api/doxygen/registry_8h__dep__incl.svg         |  160 +-
 docs/reference/api/doxygen/repr__printer_8h.html   |    2 +-
 .../api/doxygen/repr__printer_8h__dep__incl.svg    | 1556 ++++++------
 docs/reference/api/doxygen/runner_8h.html          |   10 +-
 .../reference/api/doxygen/runner_8h__dep__incl.svg |  172 +-
 docs/reference/api/doxygen/runner_8h__incl.svg     | 1414 ++++++-----
 docs/reference/api/doxygen/runner_8h_source.html   |   62 +-
 .../api/doxygen/runtime_2container_2adt_8h.html    |    2 +-
 .../runtime_2container_2adt_8h__dep__incl.svg      | 1278 +++++-----
 .../api/doxygen/runtime_2container_2base_8h.html   |    2 +-
 .../runtime_2container_2base_8h__dep__incl.svg     | 2153 +++++++++-------
 docs/reference/api/doxygen/runtime_2memory_8h.html |    2 +-
 .../api/doxygen/runtime_2memory_8h__dep__incl.svg  | 2145 +++++++++-------
 docs/reference/api/doxygen/runtime_2module_8h.html |    2 +-
 .../api/doxygen/runtime_2module_8h__dep__incl.svg  | 1483 +++++------
 docs/reference/api/doxygen/schedule__rule_8h.html  |   12 +-
 .../api/doxygen/schedule__rule_8h__incl.svg        | 2349 +++++++++--------
 .../api/doxygen/schedule__rule_8h_source.html      |   28 +-
 docs/reference/api/doxygen/search/all_1.js         |    2 +-
 docs/reference/api/doxygen/search/all_14.js        |    6 +-
 docs/reference/api/doxygen/search/all_15.js        |   12 +-
 docs/reference/api/doxygen/search/all_16.js        |    4 +-
 docs/reference/api/doxygen/search/all_17.js        |    6 +-
 docs/reference/api/doxygen/search/all_18.js        |    2 +-
 docs/reference/api/doxygen/search/all_2.js         |    4 +-
 docs/reference/api/doxygen/search/all_6.js         |    2 +-
 docs/reference/api/doxygen/search/all_e.js         |    1 +
 docs/reference/api/doxygen/search/all_f.js         |    2 +-
 docs/reference/api/doxygen/search/classes_10.js    |    6 +-
 docs/reference/api/doxygen/search/classes_11.js    |    4 +-
 docs/reference/api/doxygen/search/files_9.js       |    1 +
 docs/reference/api/doxygen/search/functions_1.js   |    2 +-
 docs/reference/api/doxygen/search/functions_14.js  |    6 +-
 docs/reference/api/doxygen/search/functions_15.js  |    4 +-
 docs/reference/api/doxygen/search/functions_16.js  |    4 +-
 docs/reference/api/doxygen/search/functions_5.js   |    2 +-
 docs/reference/api/doxygen/search/functions_e.js   |    2 +-
 docs/reference/api/doxygen/search/variables_0.js   |    2 +-
 docs/reference/api/doxygen/search/variables_1.js   |    2 +-
 .../reference/api/doxygen/search__strategy_8h.html |   18 +-
 .../api/doxygen/search__strategy_8h__dep__incl.svg |  128 +-
 .../api/doxygen/search__strategy_8h__incl.svg      | 2334 +++++++++--------
 .../api/doxygen/search__strategy_8h_source.html    |   51 +-
 docs/reference/api/doxygen/serializer_8h.html      |    2 +-
 .../api/doxygen/serializer_8h__dep__incl.svg       | 1705 +++++++------
 docs/reference/api/doxygen/shape__tuple_8h.html    |    2 +-
 .../api/doxygen/shape__tuple_8h__dep__incl.svg     | 1696 +++++++------
 docs/reference/api/doxygen/source__map_8h.html     |    2 +-
 .../api/doxygen/source__map_8h__dep__incl.svg      | 1360 +++++-----
 .../reference/api/doxygen/space__generator_8h.html |    6 +-
 .../api/doxygen/space__generator_8h__incl.svg      | 1836 ++++++++------
 .../api/doxygen/space__generator_8h_source.html    |   24 +-
 docs/reference/api/doxygen/state_8h.html           |    2 +-
 docs/reference/api/doxygen/state_8h__dep__incl.svg |  316 ++-
 docs/reference/api/doxygen/stmt_8h.html            |    2 +-
 docs/reference/api/doxygen/stmt_8h__dep__incl.svg  | 1060 ++++----
 docs/reference/api/doxygen/string_8h.html          |    2 +-
 .../reference/api/doxygen/string_8h__dep__incl.svg | 1766 ++++++-------
 .../api/doxygen/structural__equal_8h.html          |    2 +-
 .../doxygen/structural__equal_8h__dep__incl.svg    | 1635 ++++++------
 .../reference/api/doxygen/structural__hash_8h.html |    2 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg | 1635 ++++++------
 docs/reference/api/doxygen/target_8h.html          |    2 +-
 .../reference/api/doxygen/target_8h__dep__incl.svg | 1144 ++++-----
 docs/reference/api/doxygen/target__kind_8h.html    |    2 +-
 .../api/doxygen/target__kind_8h__dep__incl.svg     | 1110 ++++----
 docs/reference/api/doxygen/task__scheduler_8h.html |    8 +-
 .../api/doxygen/task__scheduler_8h__incl.svg       | 2583 +++++++++++--------
 .../api/doxygen/task__scheduler_8h_source.html     |   55 +-
 .../api/doxygen/tir_2expr_8h__dep__incl.svg        |   72 +-
 docs/reference/api/doxygen/tir_2function_8h.html   |    2 +-
 .../api/doxygen/tir_2function_8h__dep__incl.svg    | 1300 +++++-----
 .../api/doxygen/tir_2schedule_2schedule_8h.html    |    2 +-
 .../tir_2schedule_2schedule_8h__dep__incl.svg      |  306 ++-
 docs/reference/api/doxygen/trace_8h.html           |    2 +-
 docs/reference/api/doxygen/trace_8h__dep__incl.svg |  384 +--
 docs/reference/api/doxygen/tune__context_8h.html   |   12 +-
 .../api/doxygen/tune__context_8h__dep__incl.svg    |   24 +-
 .../api/doxygen/tune__context_8h__incl.svg         | 2676 ++++++++++++--------
 .../api/doxygen/tune__context_8h_source.html       |   46 +-
 docs/reference/api/doxygen/var_8h__dep__incl.svg   |   92 +-
 docs/reference/api/doxygen/with_8h__dep__incl.svg  |  192 +-
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +-
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    6 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    3 +-
 docs/tutorial/autotvm_relay_x86.html               |  258 +-
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/sg_execution_times.html              |   26 +-
 docs/tutorial/tensor_expr_get_started.html         |   43 +-
 335 files changed, 54738 insertions(+), 46780 deletions(-)

diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index 778c5ef9c..8a7a0dcb9 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -98,7 +98,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip739d518d-7aa3-4b94-975e-12f5615fe057 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip64a722c4-ea57-41fc-8352-ca338bb20723 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index 6c3540449..26f0f5024 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -100,7 +100,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<07:35, 95.5kB/s]
      0%|          | 40.0k/41.5M [00:00<05:53, 123kB/s] 
      0%|          | 96.0k/41.5M [00:00<03:18, 219kB/s]
      0%|          | 160k/41.5M [00:00<02:33, 282kB/s] 
      1%|          | 328k/41.5M [00:00<01:20, 538kB/s]
      1%|1         | 568k/41.5M [00:01<00:51, 839kB/s]
      3%|2         | 1.10M/41.5M [00:01<00:25, 1.65MB/s]
      5%|5         | 2.19M/41.5M [00:01<00:12, 3.22MB/s]
      9%|8         | 3.66M/41.5M [00:01<00:07, 5.01MB/s]
     12%|#2        | 5.13M/41.5M [00:01<00:06, 6.23MB/s]
     16%|#5        | 6.60M/41.5M [00:01<00:05, 7.03MB/s]
     18%|#7        | 7.27M/41.5M [00:02<00:05, 6.08MB/s]
     20%|##        | 8.45M/41.5M [00:02<00:05, 6.42MB/s]
     23%|##3       | 9.55M/41.5M [00:02<00:05, 6.29MB/s]
     26%|##5       | 10.7M/41.5M [00:02<00:04, 6.63MB/s]
     29%|##9       | 12.1M/41.5M [00:02<00:04, 7.32MB/s]
     33%|###2      | 13.6M/41.5M [00:02<00
 :03, 7.81MB/s]
     36%|###6      | 15.1M/41.5M [00:03<00:03, 8.14MB/s]
     40%|###9      | 16.5M/41.5M [00:03<00:03, 8.38MB/s]
     43%|####3     | 18.0M/41.5M [00:03<00:02, 9.69MB/s]
     46%|####5     | 19.0M/41.5M [00:03<00:02, 9.89MB/s]
     48%|####8     | 20.0M/41.5M [00:03<00:02, 8.81MB/s]
     50%|#####     | 20.9M/41.5M [00:03<00:02, 8.97MB/s]
     54%|#####4    | 22.4M/41.5M [00:03<00:01, 10.2MB/s]
     56%|#####6    | 23.4M/41.5M [00:03<00:02, 9.46MB/s]
     59%|#####8    | 24.4M/41.5M [00:04<00:02, 8.16MB/s]
     61%|######1   | 25.4M/41.5M [00:04<00:02, 7.72MB/s]
     65%|######4   | 26.8M/41.5M [00:04<00:01, 8.12MB/s]
     67%|######6   | 27.6M/41.5M [00:04<00:02, 6.95MB/s]
     69%|######8   | 28.4M/41.5M [00:04<00:02, 6.33MB/s]
     71%|#######1  | 29.5M/41.5M [00:05<00:01, 6.46MB/s]
     74%|#######3  | 30.6M/41.5M [00:05<00:01, 6.56MB/s]
     77%|#######6  | 31.8M/41.5M [00:05<00:01, 6.66MB/s]
     79%|#######9  | 32.9M/41.5M [00:05<00:01, 6.76MB/s]
     82%|####
 ####2 | 34.1M/41.5M [00:05<00:01, 6.85MB/s]
     85%|########5 | 35.3M/41.5M [00:05<00:00, 6.93MB/s]
     88%|########7 | 36.5M/41.5M [00:06<00:00, 7.03MB/s]
     91%|######### | 37.7M/41.5M [00:06<00:00, 7.12MB/s]
     94%|#########3| 38.9M/41.5M [00:06<00:00, 7.19MB/s]
     97%|#########6| 40.1M/41.5M [00:06<00:00, 7.27MB/s]
    100%|#########9| 41.3M/41.5M [00:06<00:00, 7.95MB/s]
    100%|##########| 41.5M/41.5M [00:06<00:00, 6.45MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<08:16, 87.5kB/s]
      0%|          | 40.0k/41.5M [00:00<06:24, 113kB/s] 
      0%|          | 96.0k/41.5M [00:00<03:36, 201kB/s]
      0%|          | 160k/41.5M [00:00<02:47, 259kB/s] 
      1%|          | 336k/41.5M [00:00<01:24, 511kB/s]
      1%|1         | 544k/41.5M [00:01<00:59, 722kB/s]
      3%|2         | 1.08M/41.5M [00:01<00:28, 1.48MB/s]
      5%|5         | 2.16M/41.5M [00:01<00:14, 2.92MB/s]
      9%|8         | 3.64M/41.5M [00:01<00:08, 4.60MB/s]
     12%|#2        | 5.12M/41.5M [00:01<00:06, 5.72MB/s]
     16%|#5        | 6.60M/41.5M [00:02<00:05, 6.50MB/s]
     19%|#9        | 8.09M/41.5M [00:02<00:04, 7.04MB/s]
     23%|##3       | 9.56M/41.5M [00:02<00:04, 7.40MB/s]
     27%|##6       | 11.0M/41.5M [00:02<00:04, 7.65MB/s]
     30%|###       | 12.5M/41.5M [00:02<00:03, 7.84MB/s]
     34%|###3      | 14.0M/41.5M [00:03<00:03, 7.96MB/s]
     37%|###7      | 15.5M/41.5M [00:03<00
 :03, 8.04MB/s]
     41%|####      | 17.0M/41.5M [00:03<00:03, 8.10MB/s]
     44%|####4     | 18.4M/41.5M [00:03<00:02, 8.15MB/s]
     48%|####8     | 19.9M/41.5M [00:03<00:02, 8.18MB/s]
     52%|#####1    | 21.4M/41.5M [00:03<00:02, 8.19MB/s]
     55%|#####5    | 22.9M/41.5M [00:04<00:02, 8.22MB/s]
     59%|#####8    | 24.4M/41.5M [00:04<00:02, 8.22MB/s]
     62%|######2   | 25.8M/41.5M [00:04<00:01, 8.23MB/s]
     66%|######5   | 27.3M/41.5M [00:04<00:01, 8.24MB/s]
     69%|######9   | 28.8M/41.5M [00:04<00:01, 8.24MB/s]
     73%|#######2  | 30.3M/41.5M [00:05<00:01, 8.23MB/s]
     77%|#######6  | 31.8M/41.5M [00:05<00:01, 8.23MB/s]
     80%|########  | 33.2M/41.5M [00:05<00:01, 8.25MB/s]
     84%|########3 | 34.7M/41.5M [00:05<00:00, 8.24MB/s]
     87%|########7 | 36.2M/41.5M [00:05<00:00, 8.25MB/s]
     91%|######### | 37.7M/41.5M [00:06<00:00, 8.25MB/s]
     94%|#########4| 39.2M/41.5M [00:06<00:00, 8.25MB/s]
     98%|#########7| 40.6M/41.5M [00:06<00:00, 8.24MB/s]
    100%|####
 ######| 41.5M/41.5M [00:06<00:00, 6.79MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_paddle.rst.txt b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
index c07482db7..4141d605c 100644
--- a/docs/_sources/how_to/compile_models/from_paddle.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
@@ -210,7 +210,7 @@ Look up prediction top 1 index in 1000 class synset.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  18.358 seconds)
+   **Total running time of the script:** ( 1 minutes  6.280 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_paddle.py:
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 0822a61ac..5e07afa36 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -79,7 +79,7 @@ Load a pretrained PyTorch model
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     45%|####4     | 19.9M/44.7M [00:00<00:00, 209MB/s]
     93%|#########3| 41.6M/44.7M [00:00<00:00, 220MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 220MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     34%|###4      | 15.2M/44.7M [00:00<00:00, 159MB/s]
     83%|########3 | 37.3M/44.7M [00:00<00:00, 201MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 200MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 139929571..1f57da32d 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -381,7 +381,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  4.604 seconds)
+   **Total running time of the script:** ( 1 minutes  8.900 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 3a628c1e4..d6e7c3395 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,15 +5,15 @@
 
 Computation times
 =================
-**05:42.222** total execution time for **how_to_compile_models** files:
+**05:32.395** total execution time for **how_to_compile_models** files:
 
-- **01:18.358**: :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)
-- **01:04.604**: :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``)
-- **00:59.131**: :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)
-- **00:33.456**: :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)
-- **00:24.167**: :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)
-- **00:24.139**: :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)
-- **00:22.248**: :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)
-- **00:19.924**: :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)
-- **00:13.662**: :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)
-- **00:02.534**: :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)
+- **01:08.900**: :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``)
+- **01:06.280**: :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)
+- **00:58.537**: :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)
+- **00:32.878**: :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)
+- **00:24.378**: :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)
+- **00:22.891**: :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)
+- **00:22.038**: :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)
+- **00:19.623**: :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)
+- **00:14.138**: :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)
+- **00:02.733**: :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 4f315ef5d..103da159f 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -402,7 +402,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      16.2313      16.2338      16.2924      16.1510       0.0399   
+      16.2210      16.2544      16.3771      15.9601       0.1360   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 91164a1ca..cf21bcb66 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -108,7 +108,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
     11%|#1        | 19.0M/170M [00:00<00:00, 200MB/s]
     25%|##4       | 42.0M/170M [00:00<00:00, 224MB/s]
     38%|###8      | 65.2M/170M [00:00<00:00, 233MB/s]
     53%|#####3    | 90.0M/170M [00:00<00:00, 244MB/s]
     68%|######8   | 116M/170M [00:00<00:00, 254MB/s] 
     83%|########3 | 141M/170M [00:00<00:00, 258MB/s]
     98%|#########7| 166M/170M [00:00<00:00, 259MB/s]
    100%|##########| 170M/170M [00:00<00:00, 250MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
     11%|#1        | 19.5M/170M [00:00<00:00, 205MB/s]
     27%|##7       | 46.2M/170M [00:00<00:00, 249MB/s]
     43%|####3     | 73.3M/170M [00:00<00:00, 265MB/s]
     58%|#####8    | 98.5M/170M [00:00<00:00, 247MB/s]
     73%|#######3  | 125M/170M [00:00<00:00, 256MB/s] 
     89%|########9 | 151M/170M [00:00<00:00, 264MB/s]
    100%|##########| 170M/170M [00:00<00:00, 253MB/s]
     /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -262,7 +262,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  4.934 seconds)
+   **Total running time of the script:** ( 3 minutes  5.231 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index 02f9f07ed..7c35f02ec 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -187,7 +187,7 @@ training. Other models require a full post training calibration.
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 156MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 153MB/s]
 
 
 
@@ -353,7 +353,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.6083      90.3127      97.9939      90.1749       1.1198   
+      90.4766      90.4321      91.2257      90.2658       0.1759   
                
 
 
@@ -393,7 +393,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  8.829 seconds)
+   **Total running time of the script:** ( 1 minutes  8.638 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index ae362af71..5af5e2a23 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -360,7 +360,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      119.0403     118.8458     123.3791     118.2175      0.7745   
+      120.9448     120.9608     121.7090     119.9896      0.3720   
                
 
 
@@ -394,7 +394,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  58.903 seconds)
+   **Total running time of the script:** ( 1 minutes  54.339 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index b0ed4df7b..478337a7a 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -223,7 +223,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  22.221 seconds)
+   **Total running time of the script:** ( 1 minutes  15.411 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index b3e9cdf4f..4d9ccac33 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -137,7 +137,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5475/132723 [00:00<00:02, 54745.09KB/s]
     10%|9         | 13114/132723 [00:00<00:01, 67474.21KB/s]
     16%|#5        | 20717/132723 [00:00<00:01, 71377.65KB/s]
     21%|##1       | 28178/132723 [00:00<00:01, 72650.70KB/s]
     27%|##7       | 36266/132723 [00:00<00:01, 75610.49KB/s]
     33%|###3      | 44249/132723 [00:00<00:01, 77043.00KB/s]
     39%|###9      | 52130/132723 [00:00<00:01, 77617.80KB/s]
     45%|####5     | 60085/132723 [00:00<00:00, 78229.09KB/s]
     51%|#####1    | 68026/132723 [00:00<00:00, 78593.39KB/s]
     57%|#####7    | 76040/132723 [00:01<00:00, 79069.09KB/s]
     63%|######3   | 83947/132723 [00:01<00:00, 78828.58KB/s]
     69%|######9   | 91831/132723 [00:01<00:00, 78703.97KB/s]
     75%|#######5  | 99702/132723 [00:01<00:00, 61163.95KB/s]
     81%|########1 | 107529/132723 [00:01<00:00, 65458.19KB/s]
     87%|########7 | 115478/132723 [00:01<00:00, 69161.98KB/s]
     93%|#########
 3| 123516/132723 [00:01<00:00, 72232.38KB/s]
     99%|#########9| 131652/132723 [00:01<00:00, 74796.98KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 73288.21KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5503/132723 [00:00<00:02, 55023.49KB/s]
     10%|9         | 13158/132723 [00:00<00:01, 67681.39KB/s]
     15%|#5        | 19927/132723 [00:00<00:02, 49903.60KB/s]
     21%|##        | 27489/132723 [00:00<00:01, 58253.64KB/s]
     25%|##5       | 33751/132723 [00:00<00:01, 53655.50KB/s]
     31%|###1      | 41251/132723 [00:00<00:01, 59831.56KB/s]
     37%|###6      | 49083/132723 [00:00<00:01, 65238.45KB/s]
     43%|####2     | 56965/132723 [00:00<00:01, 69239.68KB/s]
     49%|####8     | 64857/132723 [00:01<00:00, 72105.14KB/s]
     55%|#####4    | 72739/132723 [00:01<00:00, 74095.80KB/s]
     61%|######    | 80634/132723 [00:01<00:00, 75541.82KB/s]
     67%|######6   | 88277/132723 [00:01<00:00, 70922.11KB/s]
     72%|#######2  | 96063/132723 [00:01<00:00, 72900.58KB/s]
     78%|#######8  | 103974/132723 [00:01<00:00, 74695.77KB/s]
     84%|########4 | 111854/132723 [00:01<00:00, 75895.29KB/s]
     90%|#########
  | 119783/132723 [00:01<00:00, 76893.64KB/s]
     96%|#########6| 127718/132723 [00:01<00:00, 77620.00KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 69911.00KB/s]
 
 
 
@@ -211,7 +211,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  22.745 seconds)
+   **Total running time of the script:** ( 2 minutes  23.592 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 6879d369c..4b5244460 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
 
 Computation times
 =================
-**10:51.548** total execution time for **how_to_deploy_models** files:
+**10:40.235** total execution time for **how_to_deploy_models** files:
 
-- **03:04.934**: :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
-- **02:22.745**: :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
-- **01:58.903**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
-- **01:22.221**: :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)
-- **01:08.829**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)
-- **00:30.761**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)
-- **00:22.948**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
-- **00:00.207**: :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)
+- **03:05.231**: :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
+- **02:23.592**: :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
+- **01:54.339**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
+- **01:15.411**: :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)
+- **01:08.638**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)
+- **00:30.284**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)
+- **00:22.527**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
+- **00:00.213**: :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index 685b7157a..22470cb78 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -425,7 +425,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipa89ba127-910b-4223-b077-7869f3b71209 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip53471836-d01e-465a-9d91-6da1f00778d7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 9f2c5f855..307043d4c 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,9 +5,9 @@
 
 Computation times
 =================
-**00:42.181** total execution time for **how_to_extend_tvm** files:
+**00:40.820** total execution time for **how_to_extend_tvm** files:
 
-- **00:38.303**: :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
-- **00:02.499**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)
-- **00:01.157**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)
-- **00:00.222**: :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)
+- **00:37.072**: :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
+- **00:02.412**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)
+- **00:01.120**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)
+- **00:00.217**: :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index 974c40863..3d7da36ce 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -199,10 +199,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6670us [6670us] (45.74%; 45.74%)
-    FoldScaleAxis: 7911us [7us] (54.26%; 54.26%)
-            FoldConstant: 7904us [1613us] (54.21%; 99.91%)
-                    InferType: 6291us [6291us] (43.14%; 79.59%)
+    InferType: 6619us [6619us] (45.79%; 45.79%)
+    FoldScaleAxis: 7836us [6us] (54.21%; 54.21%)
+            FoldConstant: 7830us [1592us] (54.17%; 99.93%)
+                    InferType: 6238us [6238us] (43.16%; 79.67%)
 
 
 
@@ -239,10 +239,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6425us [6425us] (44.75%; 44.75%)
-    FoldScaleAxis: 7931us [6us] (55.25%; 55.25%)
-            FoldConstant: 7925us [1635us] (55.20%; 99.92%)
-                    InferType: 6290us [6290us] (43.82%; 79.37%)
+    InferType: 6322us [6322us] (44.82%; 44.82%)
+    FoldScaleAxis: 7784us [5us] (55.18%; 55.18%)
+            FoldConstant: 7779us [1610us] (55.15%; 99.94%)
+                    InferType: 6169us [6169us] (43.73%; 79.30%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index 514348cb6..45ed64d51 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -295,7 +295,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 34.720307 ms
+    Convolution: 44.037937 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index 8af922a60..ab3f24ee6 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -628,7 +628,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 8.761867 ms
+    conv2d with tensor core: 13.268366 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index 2b3bd1d1b..be25c8a7a 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -118,8 +118,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.019098
-    Baseline: 3.258288
+    Numpy running time: 0.019966
+    Baseline: 3.460861
 
 
 
@@ -210,7 +210,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.317547
+    Opt1: 0.320441
 
 
 
@@ -309,7 +309,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.344534
+    Opt2: 0.348261
 
 
 
@@ -401,7 +401,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.119541
+    Opt3: 0.122005
 
 
 
@@ -520,7 +520,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.111624
+    Opt4: 0.111262
 
 
 
@@ -638,7 +638,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.112169
+    Opt5: 0.112845
 
 
 
@@ -759,7 +759,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.145835
+    Opt6: 0.144978
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 460a1012e..13b0b9db5 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
 
 Computation times
 =================
-**00:35.067** total execution time for **how_to_optimize_operators** files:
+**00:36.029** total execution time for **how_to_optimize_operators** files:
 
-- **00:32.377**: :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)
-- **00:01.461**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
-- **00:01.229**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)
+- **00:33.200**: :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)
+- **00:01.548**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
+- **00:01.281**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 61d22a23b..8d5cfce2b 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,11 +5,11 @@
 
 Computation times
 =================
-**05:31.927** total execution time for **how_to_tune_with_autoscheduler** files:
-
-- **02:47.014**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
-- **01:22.372**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)
-- **00:44.176**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)
-- **00:19.848**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)
-- **00:09.562**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)
-- **00:08.955**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)
+**05:29.427** total execution time for **how_to_tune_with_autoscheduler** files:
+
+- **02:48.697**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
+- **01:21.538**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)
+- **00:43.895**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)
+- **00:17.124**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)
+- **00:09.329**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)
+- **00:08.845**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index d1820c51b..ba7d3dc37 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -222,43 +222,484 @@ cooperative fetching, unrolling and operator fusion.
                  compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
       preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 112;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [84]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [384]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [1], [], scope="local", align=4)[0] = 0f32
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 28;
+      allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
         conv2d_nchw_1[1] = 0f32
         conv2d_nchw_1[2] = 0f32
         conv2d_nchw_1[3] = 0f32
-        for (rc.outer.outer: int32, 0, 128) {
-          for (rx.outer.outer: int32, 0, 3) {
-            attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            for (ax0.ax1.fused.ax2.fused.ax3.fused.inner.s: int32, 0, 3) {
-              if @tir.likely((threadIdx.x_1 < 28), dtype=bool) {
-                pad_temp.shared_1: Buffer(pad_temp.shared, float32, [84], [], scope="shared")[((threadIdx.x_1*3) + ax0.ax1.fused.ax2.fused.ax3.fused.inner.s)] = @tir.if_then_else(((((1 <= (floordiv(floormod(((threadIdx.x_1*3) + ax0.ax1.fused.ax2.fused.ax3.fused.inner.s), 21), 7) + floormod(blockIdx.x, 7))) && ((floordiv(floormod(((threadIdx.x_1*3) + ax0.ax1.fused.ax2.fused.ax3.fused.inner.s), 21), 7) + floormod(blockIdx.x, 7)) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*3)  [...]
-              }
-            }
-            for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 7) {
-              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-              if @tir.likely((((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*7) + floordiv(threadIdx.x_2, 8)) < 48), dtype=bool) {
-                kernel.shared_1: Buffer(kernel.shared, float32, [384], [], scope="shared")[((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2)] = kernel[((((((floordiv(blockIdx.x, 7)*147456) + (floordiv(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*14) + floordiv(threadIdx.x_2, 4)), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + threadIdx.x_2), 12), 3)*9)) + (floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer. [...]
-              }
-            }
-            for (rc.inner: int32, 0, 4) {
-              for (ry.inner: int32, 0, 3) {
-                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner)]))
-                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner) + 96)]))
-                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner) + 192)]))
-                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner) + 288)]))
+        conv2d_nchw_1[4] = 0f32
+        conv2d_nchw_1[5] = 0f32
+        conv2d_nchw_1[6] = 0f32
+        conv2d_nchw_1[7] = 0f32
+        conv2d_nchw_1[8] = 0f32
+        conv2d_nchw_1[9] = 0f32
+        conv2d_nchw_1[10] = 0f32
+        conv2d_nchw_1[11] = 0f32
+        conv2d_nchw_1[12] = 0f32
+        conv2d_nchw_1[13] = 0f32
+        for (rc.outer.outer: int32, 0, 64) {
+          for (ry.outer.outer: int32, 0, 3) {
+            let cse_var_2: int32 = (rc.outer.outer*72)
+            let cse_var_1: int32 = (ry.outer.outer*3)
+             {
+              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope="shared")[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f3 [...]
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
+                }
               }
+              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope="shared")[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 8), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 16), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 32), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 32), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 64), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 40), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 80), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 56), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 112), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 64), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 128), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 80), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 160), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 88), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 176), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 104), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 208), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 112), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 224), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 128), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 256), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 136), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 272), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 152), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 304), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 160), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 320), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 176), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 352), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 184), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 368), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 200), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 400), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 208), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 416), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 224), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 448), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 232), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 464), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 248), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 496), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 256), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 512), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 272), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 544), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 280), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 560), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 296), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 592), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 304), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 608), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 320), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 640), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 328), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 656), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 344), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 688), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 352), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 704), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 368), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 736), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 376), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 752), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
             }
           }
         }
-        compute[((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[0] + bias[((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7))]), 0f32)
-        compute[(((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 392)] = max((conv2d_nchw_1[1] + bias[(((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7)) + 8)]), 0f32)
-        compute[(((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 784)] = max((conv2d_nchw_1[2] + bias[(((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7)) + 16)]), 0f32)
-        compute[(((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 1176)] = max((conv2d_nchw_1[3] + bias[(((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7)) + 24)]), 0f32)
+        for (i1.inner: int32, 0, 2) {
+          for (i3.inner: int32, 0, 7) {
+            compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
+          }
+        }
       }
     }
 
@@ -310,7 +751,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.389 ms
+    Execution time of this operator: 0.358 ms
 
 
 
@@ -355,35 +796,35 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
     conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
-    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=4)
+    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
     conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
     conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
+    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=4)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=1)
-    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
+    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
+    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
     conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
     conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
+    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
-    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=4)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
     compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
     compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
     compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -403,14 +844,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=3)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 0)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -428,42 +869,431 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[4];
-      __shared__ float pad_temp_shared[84];
-      __shared__ float kernel_shared[384];
+    extern "C" __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[14];
+      __shared__ float pad_temp_shared[72];
+      __shared__ float kernel_shared[3072];
       conv2d_nchw[0] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
       conv2d_nchw[3] = 0.000000e+00f;
-      for (int rc_outer_outer = 0; rc_outer_outer < 128; ++rc_outer_outer) {
-        for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
+      conv2d_nchw[4] = 0.000000e+00f;
+      conv2d_nchw[5] = 0.000000e+00f;
+      conv2d_nchw[6] = 0.000000e+00f;
+      conv2d_nchw[7] = 0.000000e+00f;
+      conv2d_nchw[8] = 0.000000e+00f;
+      conv2d_nchw[9] = 0.000000e+00f;
+      conv2d_nchw[10] = 0.000000e+00f;
+      conv2d_nchw[11] = 0.000000e+00f;
+      conv2d_nchw[12] = 0.000000e+00f;
+      conv2d_nchw[13] = 0.000000e+00f;
+      for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
+        for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
           __syncthreads();
-          for (int ax0_ax1_fused_ax2_fused_ax3_fused_inner_s = 0; ax0_ax1_fused_ax2_fused_ax3_fused_inner_s < 3; ++ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) {
-            if (((int)threadIdx.x) < 28) {
-              pad_temp_shared[((((int)threadIdx.x) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s)] = (((((1 <= (((((((int)threadIdx.x) % 7) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) / 7) + (((int)blockIdx.x) % 7))) && ((((((((int)threadIdx.x) % 7) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) / 7) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) % 7)))) && ((rx_outer_outer + (((((int)threadI [...]
-            }
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 9))) && (((((int)threadIdx.x) * 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
           }
-          for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer < 7; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) {
-            if (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 7) + (((int)threadIdx.x) >> 3)) < 48) {
-              kernel_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 56) + ((int)threadIdx.x))] = kernel[(((((((((int)blockIdx.x) / 7) * 147456) + ((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 14) + (((int)threadIdx.x) >> 2)) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 8) + ((int)threadIdx.x)) % 12) / 3) * 9)) + ((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 2) + ((int)threadIdx.x)) % 3) * 3)) + rx_outer_outer)];
-            }
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 9))) && ((((((int)threadIdx.x) * 4) + 1) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
           }
-          __syncthreads();
-          for (int rc_inner = 0; rc_inner < 4; ++rc_inner) {
-            for (int ry_inner = 0; ry_inner < 3; ++ry_inner) {
-              conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner)]));
-              conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner) + 96)]));
-              conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner) + 192)]));
-              conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner) + 288)]));
-            }
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 9))) && ((((((int)threadIdx.x) * 4) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
           }
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 9))) && ((((((int)threadIdx.x) * 4) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
+          }
+          kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
+          kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
+          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
+          kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
+          kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
+          kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
+          kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+          kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
+          kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
+          kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
+          kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
+          kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
+          kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
+          kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
+          kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
+          kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          __syncthreads();
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+        }
+      }
+      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
+        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
+          compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
         }
       }
-      compute[(((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[0] + bias[(((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
-      compute[((((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7)) + 392)] = max((conv2d_nchw[1] + bias[((((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7)) + 8)]), 0.000000e+00f);
-      compute[((((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7)) + 784)] = max((conv2d_nchw[2] + bias[((((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7)) + 16)]), 0.000000e+00f);
-      compute[((((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7)) + 1176)] = max((conv2d_nchw[3] + bias[((((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7)) + 24)]), 0.000000e+00f);
     }
 
 
@@ -514,14 +1344,14 @@ In the example below we resume the status and do more 5 trials.
     /usr/local/lib/python3.7/dist-packages/xgboost/training.py:17: UserWarning: Old style callback is deprecated.  See: https://xgboost.readthedocs.io/en/latest/python/callbacks.html
       warnings.warn(f'Old style callback is deprecated.  See: {link}', UserWarning)
     Get devices for measurement successfully!
-
+    .T
 
 
 
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  47.014 seconds)
+   **Total running time of the script:** ( 2 minutes  48.697 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index 07417fd8f..a25a0ab78 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -616,7 +616,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       9.8004       9.8002       9.8580       9.7429       0.0470   
+       9.8616       9.8832       9.8934       9.8081       0.0380   
                
 
 
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index 0691936ae..18efd8c4f 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -635,7 +635,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      765.6064     766.6715     766.8813     763.2664      1.6568   
+      769.3796     770.1538     771.6261     766.3590      2.2188   
                
 
 
@@ -660,7 +660,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  22.372 seconds)
+   **Total running time of the script:** ( 1 minutes  21.538 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index b35b4c064..2be92cd6c 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -362,14 +362,14 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-      preflattened_buffer_map = {placeholder_8: placeholder_15: Buffer(placeholder_13, int32, [33], []), placeholder_6: placeholder_16: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_5: placeholder_17: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_18: Buffer(placeholder_12, int32, [4916], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
-      for (i0.outer.i1.outer.fused: int32, 0, 64) "parallel" {
-        allocate(compute_4: Pointer(global float32), float32, [1024]), storage_scope = global {
-          for (i.outer.inner: int32, 0, 4) {
+      preflattened_buffer_map = {placeholder_5: placeholder_15: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_16: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_6: placeholder_18: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
+      for (i0.outer.i1.outer.fused: int32, 0, 128) "parallel" {
+        allocate(compute_4: Pointer(global float32), float32, [512]), storage_scope = global {
+          for (nb_j.inner: int32, 0, 2) {
             for (i.inner.init: int32, 0, 16) {
-              let cse_var_1: int32 = ((i.outer.inner*256) + (i.inner.init*16))
+              let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
                {
-                compute_5: Buffer(compute_4, float32, [1024], [])[cse_var_1] = 0f32
+                compute_5: Buffer(compute_4, float32, [512], [])[cse_var_1] = 0f32
                 compute_5[(cse_var_1 + 1)] = 0f32
                 compute_5[(cse_var_1 + 2)] = 0f32
                 compute_5[(cse_var_1 + 3)] = 0f32
@@ -387,81 +387,51 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                 compute_5[(cse_var_1 + 15)] = 0f32
               }
             }
-            for (elem_idx: int32, 0, let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
+            for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
               for (i.inner: int32, 0, 16) {
-                let cse_var_3: int32 = floormod(i0.outer.i1.outer.fused, 32)
+                let cse_var_21: int32 = (elem_idx*16)
+                let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
+                let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
+                let cse_var_18: int32 = (cse_var_20 + 1)
+                let cse_var_17: int32 = (cse_var_20 + 11)
+                let cse_var_16: int32 = (cse_var_20 + 12)
+                let cse_var_15: int32 = (cse_var_20 + 13)
+                let cse_var_14: int32 = (cse_var_20 + 14)
+                let cse_var_13: int32 = (cse_var_20 + 15)
+                let cse_var_12: int32 = (cse_var_20 + 2)
+                let cse_var_11: int32 = (cse_var_20 + 3)
+                let cse_var_10: int32 = (cse_var_20 + 4)
+                let cse_var_9: int32 = (cse_var_20 + 5)
+                let cse_var_8: int32 = (cse_var_20 + 6)
+                let cse_var_7: int32 = (cse_var_20 + 7)
+                let cse_var_6: int32 = (cse_var_20 + 8)
+                let cse_var_5: int32 = (cse_var_20 + 9)
+                let cse_var_4: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i.inner*256))
+                let cse_var_3: int32 = (cse_var_20 + 10)
                  {
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_4: int32 = ((i.outer.inner*256) + (i.inner*16))
-                    compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[((placeholder_3[cse_var_3]*16) + (elem_idx*16))]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_5: int32 = (((i.outer.inner*256) + (i.inner*16)) + 1)
-                    compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 1)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_6: int32 = (((i.outer.inner*256) + (i.inner*16)) + 2)
-                    compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 2)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_7: int32 = (((i.outer.inner*256) + (i.inner*16)) + 3)
-                    compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 3)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_8: int32 = (((i.outer.inner*256) + (i.inner*16)) + 4)
-                    compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 4)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_9: int32 = (((i.outer.inner*256) + (i.inner*16)) + 5)
-                    compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 5)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_10: int32 = (((i.outer.inner*256) + (i.inner*16)) + 6)
-                    compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 6)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_11: int32 = (((i.outer.inner*256) + (i.inner*16)) + 7)
-                    compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 7)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_12: int32 = (((i.outer.inner*256) + (i.inner*16)) + 8)
-                    compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 8)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_13: int32 = (((i.outer.inner*256) + (i.inner*16)) + 9)
-                    compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 9)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_14: int32 = (((i.outer.inner*256) + (i.inner*16)) + 10)
-                    compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 10)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_15: int32 = (((i.outer.inner*256) + (i.inner*16)) + 11)
-                    compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 11)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_16: int32 = (((i.outer.inner*256) + (i.inner*16)) + 12)
-                    compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 12)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_17: int32 = (((i.outer.inner*256) + (i.inner*16)) + 13)
-                    compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 13)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_18: int32 = (((i.outer.inner*256) + (i.inner*16)) + 14)
-                    compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 14)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
-                  if @tir.likely((elem_idx < (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                    let cse_var_19: int32 = (((i.outer.inner*256) + (i.inner*16)) + 15)
-                    compute_5[cse_var_19] = (compute_5[cse_var_19] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 15)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-                  }
+                  compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
                 }
               }
             }
           }
-          for (i0.inner: int32, 0, 64) {
-            let cse_var_20: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*32768) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
-            compute[ramp(cse_var_20, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_20, 1, 16)]), broadcast(0f32, 16))
+          for (i0.inner: int32, 0, 16) {
+            let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+            compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
           }
         }
       }
@@ -515,7 +485,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 2.128 ms
+    Execution time of this operator: 1.730 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index 1a7e6f627..8e02c8e34 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:44.499** total execution time for **how_to_tune_with_autotvm** files:
+**00:44.710** total execution time for **how_to_tune_with_autotvm** files:
 
-- **00:43.564**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
+- **00:43.780**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
 - **00:00.245**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
-- **00:00.231**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
-- **00:00.230**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
-- **00:00.229**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+- **00:00.229**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
+- **00:00.228**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+- **00:00.228**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 44b5d3b69..a82f93d1d 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -859,8 +859,8 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2885496
-    No: 6   GFLOPS: 42.74/42.74     result: MeasureResult(costs=(0.005416478736842105,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5318071842193604, timestamp=1654629038.5398705)       [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
-    No: 7   GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 6   GFLOPS: 92.57/92.57     result: MeasureResult(costs=(0.0025007746875,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7914223670959473, timestamp=1654630725.560318)     [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
+    No: 7   GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -983,7 +983,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 16, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6225319
-    No: 8   GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 8   GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1106,7 +1106,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,943546
-    No: 9   GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 9   GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1229,7 +1229,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2868708
-    No: 10  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 10  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
         res = future.result()
       File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1247,7 +1247,7 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4691833
-    No: 11  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 11  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1370,7 +1370,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1042124
-    No: 12  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 12  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1493,7 +1493,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10013405
-    No: 13  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 13  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1616,7 +1616,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6732082
-    No: 14  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 14  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1739,7 +1739,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7536735
-    No: 15  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 15  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1862,7 +1862,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,482121
-    No: 16  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 16  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1985,7 +1985,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2824525
-    No: 17  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 17  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -2108,7 +2108,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4559286
-    No: 18  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 18  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -2231,7 +2231,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 32, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9677544
-    No: 19  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+    No: 19  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 721, in __call__
         yield remote, remote.load_module(os.path.split(build_result.filename)[1])
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 685, in run_through_rpc
@@ -2319,7 +2319,7 @@ for this template
       15: _PyEval_EvalFrameDefault
       14: 0x0000000000537c30
       13: _PyObject_FastCallKeywords
-      12: 0x00007f821e27efa2
+      12: 0x00007fe1f76a0fa2
       11: _ctypes_callproc
       10: ffi_call
       9: ffi_call_unix64
@@ -2384,7 +2384,7 @@ for this template
       21: _PyFunction_FastCallKeywords
       20: _PyEval_EvalFrameDefault
       19: _PyFunction_FastCall      [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6390073
-    No: 20  GFLOPS: 144.44/144.44   result: MeasureResult(costs=(0.0016027322899999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4423887729644775, timestamp=1654629065.150454)       [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
+    No: 20  GFLOPS: 144.83/144.83   result: MeasureResult(costs=(0.0015984026900000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4392552375793457, timestamp=1654630752.1502788)      [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
 
 
 
@@ -2437,7 +2437,7 @@ and measure running time.
 
     Best config:
     [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
-    Time cost of this operator: 0.001978
+    Time cost of this operator: 0.001994
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index 3a36044b3..6fd1890d4 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -294,10 +294,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.7     98.722   (1, 2, 10, 10, 3)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.106     0.981    (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.941     0.297    (1, 1, 10, 10, 3)  1       1        
-    Total_time                                    -                                             316.747   -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  314.7     98.713   (1, 2, 10, 10, 3)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.172     0.995    (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.932     0.292    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             318.804   -        -                  -       -        
 
 
 
@@ -359,10 +359,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  79.75     96.818   (1, 6, 10, 10, 1)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.702     2.066    (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.919     1.116    (1, 1, 10, 10, 3)  1       1        
-    Total_time                                    -                                             82.371    -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  79.65     96.768   (1, 6, 10, 10, 1)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.753     2.13     (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.907     1.102    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             82.311    -        -                  -       -        
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index d0831309b..3a29e250d 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -297,8 +297,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmp5yutqdte/images/target contains 8144 images
-    /tmp/tmp5yutqdte/images/random contains 5000 images
+    /tmp/tmp4fxlxmsm/images/target contains 8144 images
+    /tmp/tmp4fxlxmsm/images/random contains 5000 images
 
 
 
@@ -459,11 +459,11 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 55s - loss: 0.2116 - accuracy: 0.9278 - val_loss: 0.1487 - val_accuracy: 0.9509
+    328/328 - 55s - loss: 0.2282 - accuracy: 0.9238 - val_loss: 0.1515 - val_accuracy: 0.9588
     Epoch 2/3
-    328/328 - 52s - loss: 0.1028 - accuracy: 0.9606 - val_loss: 0.1062 - val_accuracy: 0.9637
+    328/328 - 52s - loss: 0.1016 - accuracy: 0.9620 - val_loss: 0.1071 - val_accuracy: 0.9653
     Epoch 3/3
-    328/328 - 52s - loss: 0.0638 - accuracy: 0.9754 - val_loss: 0.1429 - val_accuracy: 0.9547
+    328/328 - 52s - loss: 0.0633 - accuracy: 0.9756 - val_loss: 0.1160 - val_accuracy: 0.9630
 
 
 
@@ -825,7 +825,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 4 minutes  26.229 seconds)
+   **Total running time of the script:** ( 4 minutes  9.231 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index bb4d80a9c..cbad1eb45 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,11 +5,11 @@
 
 Computation times
 =================
-**05:14.025** total execution time for **how_to_work_with_microtvm** files:
+**04:57.098** total execution time for **how_to_work_with_microtvm** files:
 
-- **04:26.229**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)
-- **00:43.453**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)
-- **00:03.691**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)
-- **00:00.225**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)
+- **04:09.231**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)
+- **00:43.432**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)
+- **00:03.791**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)
 - **00:00.215**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``)
-- **00:00.213**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tvmc.py` (``micro_tvmc.py``)
+- **00:00.215**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)
+- **00:00.214**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tvmc.py` (``micro_tvmc.py``)
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 3a221d8cf..d8db4672b 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
 
 Computation times
 =================
-**00:12.024** total execution time for **how_to_work_with_relay** files:
+**00:12.240** total execution time for **how_to_work_with_relay** files:
 
-- **00:09.997**: :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)
-- **00:01.779**: :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)
-- **00:00.248**: :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)
+- **00:10.232**: :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)
+- **00:01.776**: :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)
+- **00:00.231**: :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 546c03916..10ab2106c 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
 
 Computation times
 =================
-**00:05.793** total execution time for **how_to_work_with_schedules** files:
+**00:05.947** total execution time for **how_to_work_with_schedules** files:
 
-- **00:02.085**: :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)
-- **00:01.152**: :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)
-- **00:00.737**: :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)
-- **00:00.733**: :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)
-- **00:00.324**: :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)
-- **00:00.266**: :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``)
-- **00:00.259**: :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)
-- **00:00.238**: :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)
+- **00:02.154**: :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)
+- **00:01.200**: :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)
+- **00:00.761**: :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)
+- **00:00.743**: :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)
+- **00:00.332**: :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)
+- **00:00.262**: :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``)
+- **00:00.255**: :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)
+- **00:00.241**: :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index a2f1df9e5..9240ba80b 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -318,7 +318,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C}
       preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpellhfz3e/input0.cc'\nsource_filename = \"/tmp/tmpellhfz3e/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmp5wlj63qn/input0.cc'\nsource_filename = \"/tmp/tmp5wlj63qn/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index f9a4dc2ee..fa0956344 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:22.006** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:22.038** total execution time for **topic_vta_tutorials_autotvm** files:
 
-- **00:21.783**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
-- **00:00.224**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)
+- **00:21.818**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
+- **00:00.219**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index afc132041..569c4600b 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -267,7 +267,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 23.64s!
+    resnet18_v1 inference graph built in 22.95s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index 6e412929e..830a8d8fa 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -303,7 +303,7 @@ The compilation steps are:
       "target_host parameter is going to be deprecated. "
     /workspace/python/tvm/relay/build_module.py:389: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 16.26s!
+    yolov3-tiny inference graph built in 15.78s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index e58e6f4a9..4c0020c75 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**01:32.773** total execution time for **topic_vta_tutorials_frontend** files:
+**01:32.403** total execution time for **topic_vta_tutorials_frontend** files:
 
-- **00:48.571**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
-- **00:44.201**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
+- **00:48.690**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
+- **00:43.713**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 4b83115f9..870ad2f05 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:03.696** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.680** total execution time for **topic_vta_tutorials_optimize** files:
 
-- **00:03.095**: :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
-- **00:00.601**: :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
+- **00:03.071**: :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
+- **00:00.609**: :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index 8b7ee5af8..d651fa9fa 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:01.104** total execution time for **topic_vta_tutorials** files:
+**00:01.128** total execution time for **topic_vta_tutorials** files:
 
-- **00:00.568**: :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
-- **00:00.536**: :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
+- **00:00.577**: :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
+- **00:00.551**: :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index c77d9aed9..33f98f232 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -306,7 +306,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 93.762 ms
+    Execution time of this operator: 94.109 ms
 
 
 
@@ -415,6 +415,11 @@ Expression (TE) language that demonstrates how TVM can optimize computational
 operations.
 
 
+.. rst-class:: sphx-glr-timing
+
+   **Total running time of the script:** ( 1 minutes  7.133 seconds)
+
+
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index dbd4d1f7b..f361f1e66 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -280,7 +280,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 499.62329165000483, 'median': 499.4997022500115, 'std': 0.6567413833476213}
+    {'mean': 498.73701694999, 'median': 498.59961209997437, 'std': 0.580608214157236}
 
 
 
@@ -494,31 +494,31 @@ the tuning data to.
 
  .. code-block:: none
 
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.40/  17.40 GFLOPS | Progress: (4/20) | 6.04 s
    [Task  1/25]  Current/Best:    6.16/  17.40 GFLOPS | Progress: (8/20) | 9.01 s
    [Task  1/25]  Current/Best:   11.43/  22.70 GFLOPS | Progress: (12/20) | 11.49 s
    [Task  1/25]  Current/Best:   16.78/  22.70 GFLOPS | Progress: (16/20) | 13.19 s
    [Task  1/25]  Current/Best:   11.44/  23.65 GFLOPS | Progress: (20/20) | 14.94 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.30/  13.15 GFLOPS | Progress: (4/20) | 3.89 s
    [Task  2/25]  Current/Best:   14.05/  17.46 GFLOPS | Progress: (8/20) | 5.20 s
    [Task  2/25]  Current/Best:   20.91/  20.91 GFLOPS | Progress: (12/20) | 6.53 s
    [Task  2/25]  Current/Best:   12.18/  20.91 GFLOPS | Progress: (16/20) | 7.79 s
    [Task  2/25]  Current/Best:   19.19/  20.91 GFLOPS | Progress: (20/20) | 9.43 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.54 GFLOPS | Progress: (4/20) | 5.82 s
    [Task  3/25]  Current/Best:   15.50/  16.86 GFLOPS | Progress: (8/20) | 7.75 s
    [Task  3/25]  Current/Best:   14.86/  16.86 GFLOPS | Progress: (12/20) | 9.49 s
    [Task  3/25]  Current/Best:    7.21/  23.55 GFLOPS | Progress: (16/20) | 11.38 s
    [Task  3/25]  Current/Best:   12.53/  23.55 GFLOPS | Progress: (20/20) | 15.97 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.42/  19.52 GFLOPS | Progress: (4/20) | 2.39 s
    [Task  4/25]  Current/Best:    6.77/  19.52 GFLOPS | Progress: (8/20) | 7.19 s
    [Task  4/25]  Current/Best:   21.56/  21.56 GFLOPS | Progress: (12/20) | 12.18 s
    [Task  4/25]  Current/Best:   16.97/  21.56 GFLOPS | Progress: (16/20) | 14.64 s
    [Task  4/25]  Current/Best:   13.16/  21.56 GFLOPS | Progress: (20/20) | 16.63 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.75/  10.07 GFLOPS | Progress: (4/20) | 2.56 s
    [Task  5/25]  Current/Best:   11.80/  13.28 GFLOPS | Progress: (8/20) | 4.62 s
    [Task  5/25]  Current/Best:    9.48/  17.98 GFLOPS | Progress: (12/20) | 7.88 s
    [Task  5/25]  Current/Best:   11.82/  22.52 GFLOPS | Progress: (16/20) | 9.30 s
    [Task  5/25]  Current/Best:   11.74/  22.52 GFLOPS | Progress: (20/20) | 11.26 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.25/  20.61 GFLOPS | Progress: (4/20) | 4.08 s
    [Task  6/25]  Current/Best:   18.82/  20.61 GFLOPS | Progress: (8/20) | 5.85 s
    [Task  6/25]  Current/Best:   13.14/  20.61 GFLOPS | Progress: (12/20) | 7.81 s
    [Task  6/25]  Current/Best:   19.89/  20.61 GFLOPS | Progress: (16/20) | 10.10 s
    [Task  6/25]  Current/Best:    3.76/  20.61 GFLOPS | Progress: (20/20) | 12.64 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.00/  12.42 GFLOPS | Progress: (4/20) | 3.56 s
    [Task  7/25]  Current/Best:   20.11/  21.18 GFLOPS | Progress: (8/20) | 5.10 s
    [Task  7/25]  Current/Best:   15.81/  21.18 GFLOPS | Progress: (12/20) | 7.06 s
    [Task  7/25]  Current/Best:   12.25/  21.18 GFLOPS | Progress: (16/20) | 9.12 s
    [Task  7/25]  Current/Best:    6.34/  21.70 GFLOPS | Progress: (20/20) | 11.59 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.40/  13.83 GFLOPS | Progress: (4/20) | 2.89 s
    [Task  8/25]  Current/Best:    9.94/  13.83 GFLOPS | Progress: (8/20) | 7.99 s
    [Task  8/25]  Current/Best:   13.03/  13.94 GFLOPS | Progress: (12/20) | 14.70 s
    [Task  8/25]  Current/Best:   18.96/  18.96 GFLOPS | Progress: (16/20) | 16.77 s
    [Task  8/25]  Current/Best:   20.34/  20.34 GFLOPS | Progress: (20/20) | 23.85 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.25/  15.52 GFLOPS | Progress: (4/20) | 11.92 s
    [Task  9/25]  Current/Best:   23.35/  23.35 GFLOPS | Progress: (8/20) | 13.69 s
    [Task  9/25]  Current/Best:    8.21/  23.35 GFLOPS | Progress: (12/20) | 16.23 s
    [Task  9/25]  Current/Best:   17.68/  23.35 GFLOPS | Progress: (16/20) | 19.08 s
    [Task  9/25]  Current/Best:    8.95/  23.35 GFLOPS | Progress: (20/20) | 27.79 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.29/  18.29 GFLOPS | Progress: (4/20) | 2.51 s
    [Task 10/25]  Current/Best:   15.36/  18.29 GFLOPS | Progress: (8/20) | 4.16 s
    [Task 10/25]  Current/Best:   11.66/  18.98 GFLOPS | Progress: (12/20) | 5.73 s
    [Task 10/25]  Current/Best:   18.83/  20.36 GFLOPS | Progress: (16/20) | 6.84 s
    [Task 10/25]  Current/Best:    9.01/  20.36 GFLOPS | Progress: (20/20
 ) | 8.37 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.25/  18.04 GFLOPS | Progress: (4/20) | 3.28 s
    [Task 11/25]  Current/Best:   16.63/  18.04 GFLOPS | Progress: (8/20) | 6.07 s
    [Task 11/25]  Current/Best:   18.12/  18.12 GFLOPS | Progress: (12/20) | 8.11 s
    [Task 11/25]  Current/Best:   13.42/  21.03 GFLOPS | Progress: (16/20) | 11.07 s
    [Task 11/25]  Current/Best:   19.38/  21.51 GFLOPS | Progress: (20/20) | 13.16 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.80/  18.10 GFLOPS | Progress: (4/20) | 5.69 s
    [Task 12/25]  Current/Best:    5.30/  18.10 GFLOPS | Progress: (8/20) | 9.62 s
    [Task 12/25]  Current/Best:   18.68/  18.99 GFLOPS | Progress: (12/20) | 11.61 s
    [Task 12/25]  Current/Best:   14.75/  18.99 GFLOPS | Progress: (16/20) | 14.52 s
    [Task 12/25]  Current/Best:   15.26/  19.06 GFLOPS | Progress: (20/20) | 16.43 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.49/  17.29 GFLOPS | Progress: (4/20) | 3.73 s
    [Task 13/25]  Current/Best:   16.07/  20.78 GFLOPS | Progress: (8/20) | 6.32 s
    [Task 13/25]  Current/Best:   19.20/  20.78 GFLOPS | Progress: (12/20) | 9.45 s
    [Task 13/25]  Current/Best:   12.20/  20.78 GFLOPS | Progress: (16/20) | 12.95 s
    [Task 13/25]  Current/Best:   18.60/  20.78 GFLOPS | Progress: (20/20) | 15.25 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.51/  13.51 GFLOPS | Progress: (4/20) | 3.38 s
    [Task 14/25]  Current/Best:    6.12/  13.51 GFLOPS | Progress: (8/20) | 5.58 s
    [Task 14/25]  Current/Best:   20.67/  20.67 GFLOPS | Progress: (12/20) | 8.27 s
    [Task 14/25]  Current/Best:   16.97/  20.67 GFLOPS | Progress: (16/20) | 9.93 s Done.
-
    [Task 14/25]  Current/Best:   17.38/  20.67 GFLOPS | Progress: (20/20) | 11.66 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.00/  17.50 GFLOPS | Progress: (4/20) | 2.64 s
    [Task 15/25]  Current/Best:   14.35/  17.82 GFLOPS | Progress: (8/20) | 3.99 s
    [Task 15/25]  Current/Best:   10.38/  22.29 GFLOPS | Progress: (12/20) | 6.21 s
    [Task 15/25]  Current/Best:   20.35/  22.29 GFLOPS | Progress: (16/20) | 9.98 s
    [Task 15/25]  Current/Best:    9.67/  22.29 GFLOPS | Progress: (20/20) | 11.00 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.18/  20.18 GFLOPS | Progress: (4/20) | 2.94 s
    [Task 16/25]  Current/Best:    3.04/  20.18 GFLOPS | Progress: (8/20) | 4.58 s
    [Task 16/25]  Current/Best:   19.09/  20.18 GFLOPS | Progress: (12/20) | 5.80 s
    [Task 16/25]  Current/Best:   17.82/  20.18 GFLOPS | Progress: (16/20) |
  7.18 s
    [Task 16/25]  Current/Best:   10.02/  21.99 GFLOPS | Progress: (20/20) | 9.37 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.65/  18.40 GFLOPS | Progress: (4/20) | 4.75 s
    [Task 17/25]  Current/Best:   14.24/  23.03 GFLOPS | Progress: (8/20) | 7.58 s
    [Task 17/25]  Current/Best:   16.71/  23.03 GFLOPS | Progress: (12/20) | 9.63 s
    [Task 17/25]  Current/Best:   16.46/  23.03 GFLOPS | Progress: (16/20) | 11.92 s
    [Task 17/25]  Current/Best:    9.99/  23.03 GFLOPS | Progress: (20/20) | 14.09 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.32/  17.76 GFLOPS | Progress: (4/20) | 3.75 s
    [Task 18/25]  Current/Best:   10.57/  18.55 GFLOPS | Progress: (8/20) | 7.45 s
    [Task 18/25]  Current/Best:   19.41/  19.41 GFLOPS | Progress: (12/20) | 9.40 s
    [Task 18/25]  Current/Best:    9.95/  19.41 GFLOPS | Progress: (16/20) | 13.25 s
    [Task 18/25]  Current/Best:   19.74/  19.74 GFLOPS | Progress: (20/20) | 14.79 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    6.37/  20.16 GFLOPS | Progress: (4/20) | 6.20 s
    [Task 19/25]  Current/Best:    2.60/  20.16 GFLOPS | Progress: (8/20) | 9.60 s
    [Task 19/25]  Current/Best:   18.95/  20.79 GFLOPS | Progress: (12/20) | 12.55 s
    [Task 19/25]  Current/Best:   15.23/  21.15 GFLOPS | Progress: (16/20) | 15.54 s
    [Task 19/25]  Current/Best:    2.70/  22.98 GFLOPS | Progress: (20/20) | 18.33 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.97/  15.10 GFLOPS | Progress: (4/20) | 3.35 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.45/  17.45 GFLOPS | Progress: (4/20) | 6.05 s
    [Task  1/25]  Current/Best:    6.15/  17.45 GFLOPS | Progress: (8/20) | 9.05 s
    [Task  1/25]  Current/Best:   11.49/  22.47 GFLOPS | Progress: (12/20) | 11.52 s
    [Task  1/25]  Current/Best:   16.70/  22.47 GFLOPS | Progress: (16/20) | 13.22 s
    [Task  1/25]  Current/Best:   11.61/  23.77 GFLOPS | Progress: (20/20) | 14.97 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.28/  13.08 GFLOPS | Progress: (4/20) | 3.81 s
    [Task  2/25]  Current/Best:   13.94/  17.98 GFLOPS | Progress: (8/20) | 5.12 s
    [Task  2/25]  Current/Best:   20.91/  20.91 GFLOPS | Progress: (12/20) | 6.45 s
    [Task  2/25]  Current/Best:   11.92/  20.91 GFLOPS | Progress: (16/20) | 7.72 s
    [Task  2/25]  Current/Best:   19.51/  20.91 GFLOPS | Progress: (20/20) | 9.34 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.54 GFLOPS | Progress: (4/20) | 5.84 s
    [Task  3/25]  Current/Best:   15.48/  16.88 GFLOPS | Progress: (8/20) | 7.79 s
    [Task  3/25]  Current/Best:   14.84/  16.88 GFLOPS | Progress: (12/20) | 9.56 s
    [Task  3/25]  Current/Best:    7.17/  23.74 GFLOPS | Progress: (16/20) | 11.50 s
    [Task  3/25]  Current/Best:   12.53/  23.74 GFLOPS | Progress: (20/20) | 16.05 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.50/  20.49 GFLOPS | Progress: (4/20) | 2.38 s
    [Task  4/25]  Current/Best:    6.61/  20.49 GFLOPS | Progress: (8/20) | 6.80 s
    [Task  4/25]  Current/Best:   21.46/  21.46 GFLOPS | Progress: (12/20) | 11.40 s
    [Task  4/25]  Current/Best:   16.83/  21.46 GFLOPS | Progress: (16/20) | 13.65 s
    [Task  4/25]  Current/Best:   13.10/  21.46 GFLOPS | Progress: (20/20) | 15.71 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.49/  10.16 GFLOPS | Progress: (4/20) | 2.57 s
    [Task  5/25]  Current/Best:   11.52/  12.64 GFLOPS | Progress: (8/20) | 4.63 s
    [Task  5/25]  Current/Best:   10.48/  18.08 GFLOPS | Progress: (12/20) | 7.73 s
    [Task  5/25]  Current/Best:   11.53/  22.51 GFLOPS | Progress: (16/20) | 9.15 s
    [Task  5/25]  Current/Best:   11.90/  22.51 GFLOPS | Progress: (20/20) | 11.04 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.15/  20.89 GFLOPS | Progress: (4/20) | 3.98 s
    [Task  6/25]  Current/Best:   18.95/  20.89 GFLOPS | Progress: (8/20) | 5.76 s
    [Task  6/25]  Current/Best:   13.05/  20.89 GFLOPS | Progress: (12/20) | 7.70 s
    [Task  6/25]  Current/Best:   19.85/  20.89 GFLOPS | Progress: (16/20) | 9.96 s
    [Task  6/25]  Current/Best:    3.71/  20.89 GFLOPS | Progress: (20/20) | 12.50 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   10.66/  12.13 GFLOPS | Progress: (4/20) | 3.55 s
    [Task  7/25]  Current/Best:   20.21/  21.14 GFLOPS | Progress: (8/20) | 5.07 s
    [Task  7/25]  Current/Best:   15.89/  21.14 GFLOPS | Progress: (12/20) | 7.00 s
    [Task  7/25]  Current/Best:   12.24/  21.14 GFLOPS | Progress: (16/20) | 9.06 s
    [Task  7/25]  Current/Best:    6.31/  21.57 GFLOPS | Progress: (20/20) | 11.53 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.22/  14.10 GFLOPS | Progress: (4/20) | 2.90 s
    [Task  8/25]  Current/Best:    9.51/  14.10 GFLOPS | Progress: (8/20) | 7.67 s
    [Task  8/25]  Current/Best:   12.74/  14.10 GFLOPS | Progress: (12/20) | 13.85 s
    [Task  8/25]  Current/Best:   18.96/  18.96 GFLOPS | Progress: (16/20) | 15.96 s
    [Task  8/25]  Current/Best:   20.24/  20.24 GFLOPS | Progress: (20/20) | 22.51 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.18/  15.78 GFLOPS | Progress: (4/20) | 11.93 s
    [Task  9/25]  Current/Best:   23.20/  23.20 GFLOPS | Progress: (8/20) | 13.71 s
    [Task  9/25]  Current/Best:    8.25/  23.20 GFLOPS | Progress: (12/20) | 16.09 s
    [Task  9/25]  Current/Best:   17.92/  23.20 GFLOPS | Progress: (16/20) | 18.78 s
    [Task  9/25]  Current/Best:    8.98/  23.20 GFLOPS | Progress: (20/20) | 26.46 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.01/  18.01 GFLOPS | Progress: (4/20) | 2.50 s
    [Task 10/25]  Current/Best:   15.26/  18.01 GFLOPS | Progress: (8/20) | 4.11 s
    [Task 10/25]  Current/Best:   12.48/  18.97 GFLOPS | Progress: (12/20) | 5.64 s
    [Task 10/25]  Current/Best:   18.98/  20.26 GFLOPS | Progress: (16/20) | 6.75 s
    [Task 10/25]  Current/Best:    8.96/  20.26 GFLOPS | Progress: (20/20
 ) | 8.29 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.20/  18.09 GFLOPS | Progress: (4/20) | 3.30 s
    [Task 11/25]  Current/Best:   15.22/  18.09 GFLOPS | Progress: (8/20) | 6.03 s
    [Task 11/25]  Current/Best:   18.17/  18.17 GFLOPS | Progress: (12/20) | 8.09 s
    [Task 11/25]  Current/Best:   11.77/  21.18 GFLOPS | Progress: (16/20) | 10.84 s
    [Task 11/25]  Current/Best:   19.41/  21.54 GFLOPS | Progress: (20/20) | 12.85 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.78/  17.97 GFLOPS | Progress: (4/20) | 5.38 s
    [Task 12/25]  Current/Best:    5.21/  17.97 GFLOPS | Progress: (8/20) | 9.09 s
    [Task 12/25]  Current/Best:   18.88/  18.88 GFLOPS | Progress: (12/20) | 11.10 s
    [Task 12/25]  Current/Best:   14.03/  18.88 GFLOPS | Progress: (16/20) | 13.92 s
    [Task 12/25]  Current/Best:   15.12/  18.88 GFLOPS | Progress: (20/20) | 15.83 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.64/  17.27 GFLOPS | Progress: (4/20) | 3.61 s
    [Task 13/25]  Current/Best:   15.55/  20.87 GFLOPS | Progress: (8/20) | 6.04 s
    [Task 13/25]  Current/Best:   19.49/  21.58 GFLOPS | Progress: (12/20) | 9.00 s
    [Task 13/25]  Current/Best:   12.21/  21.58 GFLOPS | Progress: (16/20) | 12.41 s
    [Task 13/25]  Current/Best:   18.54/  21.58 GFLOPS | Progress: (20/20) | 14.65 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.40/  13.40 GFLOPS | Progress: (4/20) | 3.32 s
    [Task 14/25]  Current/Best:    6.06/  13.40 GFLOPS | Progress: (8/20) | 5.52 s
    [Task 14/25]  Current/Best:   20.70/  20.70 GFLOPS | Progress: (12/20) | 8.06 s
    [Task 14/25]  Current/Best:   16.19/  20.70 GFLOPS | Progress: (16/20) | 9.72 s Done.
+
    [Task 14/25]  Current/Best:   16.91/  20.70 GFLOPS | Progress: (20/20) | 11.46 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.13/  17.65 GFLOPS | Progress: (4/20) | 2.64 s
    [Task 15/25]  Current/Best:   14.48/  18.04 GFLOPS | Progress: (8/20) | 3.95 s
    [Task 15/25]  Current/Best:   10.39/  22.22 GFLOPS | Progress: (12/20) | 6.02 s
    [Task 15/25]  Current/Best:   20.34/  22.22 GFLOPS | Progress: (16/20) | 9.49 s
    [Task 15/25]  Current/Best:    9.62/  22.22 GFLOPS | Progress: (20/20) | 10.50 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.35/  20.35 GFLOPS | Progress: (4/20) | 2.97 s
    [Task 16/25]  Current/Best:    3.03/  20.35 GFLOPS | Progress: (8/20) | 4.59 s
    [Task 16/25]  Current/Best:   19.44/  20.35 GFLOPS | Progress: (12/20) | 5.82 s
    [Task 16/25]  Current/Best:   16.75/  20.35 GFLOPS | Progress: (16/20) |
  7.18 s
    [Task 16/25]  Current/Best:   10.09/  21.08 GFLOPS | Progress: (20/20) | 9.25 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   14.16/  18.80 GFLOPS | Progress: (4/20) | 4.71 s
    [Task 17/25]  Current/Best:   14.41/  23.05 GFLOPS | Progress: (8/20) | 7.59 s
    [Task 17/25]  Current/Best:   17.19/  23.05 GFLOPS | Progress: (12/20) | 9.63 s
    [Task 17/25]  Current/Best:   16.46/  23.05 GFLOPS | Progress: (16/20) | 11.78 s
    [Task 17/25]  Current/Best:   10.01/  23.05 GFLOPS | Progress: (20/20) | 13.93 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.32/  17.73 GFLOPS | Progress: (4/20) | 3.68 s
    [Task 18/25]  Current/Best:   10.53/  19.68 GFLOPS | Progress: (8/20) | 7.10 s
    [Task 18/25]  Current/Best:   19.36/  19.68 GFLOPS | Progress: (12/20) | 9.04 s
    [Task 18/25]  Current/Best:    9.92/  19.68 GFLOPS | Progress: (16/20) | 12.66 s
    [Task 18/25]  Current/Best:   20.66/  20.66 GFLOPS | Progress: (20/20) | 14.18 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    5.85/  20.12 GFLOPS | Progress: (4/20) | 6.29 s
    [Task 19/25]  Current/Best:    2.60/  20.12 GFLOPS | Progress: (8/20) | 9.55 s
    [Task 19/25]  Current/Best:   19.31/  20.95 GFLOPS | Progress: (12/20) | 12.34 s
    [Task 19/25]  Current/Best:   15.24/  21.18 GFLOPS | Progress: (16/20) | 15.13 s
    [Task 19/25]  Current/Best:    2.70/  23.42 GFLOPS | Progress: (20/20) | 17.91 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    8.76/  14.96 GFLOPS | Progress: (4/20) | 3.25 s Done.
      Done.
-
    [Task 20/25]  Current/Best:   10.33/  15.10 GFLOPS | Progress: (8/20) | 6.78 s
    [Task 20/25]  Current/Best:    2.32/  16.71 GFLOPS | Progress: (12/20) | 10.70 s
    [Task 20/25]  Current/Best:   12.53/  16.71 GFLOPS | Progress: (16/20) | 14.50 s
    [Task 20/25]  Current/Best:   13.43/  21.64 GFLOPS | Progress: (20/20) | 16.65 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.38/  17.61 GFLOPS | Progress: (4/20) | 3.26 s
    [Task 21/25]  Current/Best:   14.37/  17.61 GFLOPS | Progress: (8/20) | 4.90 s
    [Task 21/25]  Current/Best:    1.61/  17.61 GFLOPS | Progress: (12/20) | 7.02 s
    [Task 21/25]  Current/Best:   18.08/  18.08 GFLOPS | Progress: (16/20) | 10.56 s
    [Task 21/25]  Current/Best:    4.46/  18.08 GFLOPS | Progress: (20/20) | 18.05 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  16.94 GFLOPS | Progress: (4/20
 ) | 2.66 s
    [Task 22/25]  Current/Best:    9.08/  21.24 GFLOPS | Progress: (8/20) | 4.71 s
    [Task 22/25]  Current/Best:   19.66/  21.24 GFLOPS | Progress: (12/20) | 7.09 s
    [Task 22/25]  Current/Best:   15.15/  21.24 GFLOPS | Progress: (16/20) | 9.23 s
    [Task 22/25]  Current/Best:   14.84/  21.24 GFLOPS | Progress: (20/20) | 10.98 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.25/  20.05 GFLOPS | Progress: (4/20) | 3.23 s
    [Task 23/25]  Current/Best:   15.77/  20.05 GFLOPS | Progress: (8/20) | 6.63 s
    [Task 23/25]  Current/Best:   20.38/  21.19 GFLOPS | Progress: (12/20) | 8.51 s
    [Task 23/25]  Current/Best:    6.11/  21.19 GFLOPS | Progress: (16/20) | 15.70 s
    [Task 23/25]  Current/Best:    7.50/  21.19 GFLOPS | Progress: (20/20) | 19.97 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.62/   8.62 GFLOPS | Progress: (4/20) | 11.76 s
    [Task 24/25]  Current/Best:    1.93/   8.62 GFLOPS | Progress: (8/20) | 22.76 s
    [Task 24/25]  Current/Best:    4.25/   8.62 GFLOPS | Progress: (12/20) | 34.31 s Done.
+
    [Task 20/25]  Current/Best:    9.75/  14.96 GFLOPS | Progress: (8/20) | 6.53 s
    [Task 20/25]  Current/Best:    2.32/  16.70 GFLOPS | Progress: (12/20) | 10.46 s
    [Task 20/25]  Current/Best:   12.57/  16.70 GFLOPS | Progress: (16/20) | 14.18 s
    [Task 20/25]  Current/Best:   13.19/  21.75 GFLOPS | Progress: (20/20) | 16.27 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.39/  17.45 GFLOPS | Progress: (4/20) | 3.21 s
    [Task 21/25]  Current/Best:   14.48/  17.45 GFLOPS | Progress: (8/20) | 4.76 s
    [Task 21/25]  Current/Best:    1.61/  17.45 GFLOPS | Progress: (12/20) | 6.87 s
    [Task 21/25]  Current/Best:   18.28/  18.28 GFLOPS | Progress: (16/20) | 10.32 s
    [Task 21/25]  Current/Best:    4.47/  18.28 GFLOPS | Progress: (20/20) | 17.48 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  16.95 GFLOPS | Progress: (4/20
 ) | 2.66 s
    [Task 22/25]  Current/Best:    8.95/  21.65 GFLOPS | Progress: (8/20) | 4.56 s
    [Task 22/25]  Current/Best:   19.95/  21.65 GFLOPS | Progress: (12/20) | 6.87 s
    [Task 22/25]  Current/Best:   15.15/  21.65 GFLOPS | Progress: (16/20) | 8.93 s
    [Task 22/25]  Current/Best:   14.46/  21.65 GFLOPS | Progress: (20/20) | 10.65 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   16.98/  20.37 GFLOPS | Progress: (4/20) | 3.22 s
    [Task 23/25]  Current/Best:   15.36/  20.37 GFLOPS | Progress: (8/20) | 6.55 s
    [Task 23/25]  Current/Best:   20.92/  21.32 GFLOPS | Progress: (12/20) | 8.36 s
    [Task 23/25]  Current/Best:    6.37/  21.32 GFLOPS | Progress: (16/20) | 15.44 s
    [Task 23/25]  Current/Best:    7.58/  21.32 GFLOPS | Progress: (20/20) | 19.67 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.53/   8.53 GFLOPS | Progress: (4/20) | 11.78 s
    [Task 24/25]  Current/Best:    3.39/   8.53 GFLOPS | Progress: (8/20) | 23.02 s
    [Task 24/25]  Current/Best:    3.90/   8.53 GFLOPS | Progress: (12/20) | 33.77 s Done.
      Done.
-
    [Task 24/25]  Current/Best:    7.30/   8.62 GFLOPS | Progress: (16/20) | 40.12 s
    [Task 24/25]  Current/Best:    3.28/   8.79 GFLOPS | Progress: (20/20) | 46.16 s Done.
-
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.54/   2.89 GFLOPS | Progress: (4/20) | 11.57 s
    [Task 25/25]  Current/Best:    5.30/   7.73 GFLOPS | Progress: (8/20) | 22.84 s
    [Task 25/25]  Current/Best:    5.74/   7.73 GFLOPS | Progress: (12/20) | 34.24 s
    [Task 25/25]  Current/Best:    5.67/   8.84 GFLOPS | Progress: (16/20) | 36.09 s
    [Task 25/25]  Current/Best:    2.91/   8.84 GFLOPS | Progress: (20/20) | 46.83 s
+
    [Task 24/25]  Current/Best:    7.07/   8.53 GFLOPS | Progress: (16/20) | 39.37 s
    [Task 24/25]  Current/Best:    3.25/   8.78 GFLOPS | Progress: (20/20) | 45.38 s Done.
+
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.85 GFLOPS | Progress: (4/20) | 11.58 s
    [Task 25/25]  Current/Best:    5.54/   7.30 GFLOPS | Progress: (8/20) | 22.86 s
    [Task 25/25]  Current/Best:    5.94/   7.30 GFLOPS | Progress: (12/20) | 34.26 s
    [Task 25/25]  Current/Best:    5.54/   8.82 GFLOPS | Progress: (16/20) | 36.01 s
    [Task 25/25]  Current/Best:    2.82/   8.82 GFLOPS | Progress: (20/20) | 46.71 s
 
 
 The output from this tuning process will look something like this:
@@ -660,8 +660,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 413.6162267100053, 'median': 413.5417612000083, 'std': 0.7392341209357353}
-    unoptimized: {'mean': 499.62329165000483, 'median': 499.4997022500115, 'std': 0.6567413833476213}
+    optimized: {'mean': 415.2628203399854, 'median': 414.3342859999848, 'std': 2.549933554378138}
+    unoptimized: {'mean': 498.73701694999, 'median': 498.59961209997437, 'std': 0.580608214157236}
 
 
 
@@ -681,7 +681,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 10 minutes  27.937 seconds)
+   **Total running time of the script:** ( 10 minutes  17.768 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 73204dff7..ddd929a75 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -235,7 +235,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.289e-07 secs/op
+    1.265e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index c31b850c6..928a2cab0 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -233,7 +233,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0xc407820)), stage(b, placeholder(b, 0x9b460b0)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min= [...]
+    [stage(a, placeholder(a, 0xbf54950)), stage(b, placeholder(b, 0x117e3f10)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index 0bf343bb1..8f19fa933 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,17 +5,17 @@
 
 Computation times
 =================
-**13:15.185** total execution time for **tutorial** files:
+**13:21.459** total execution time for **tutorial** files:
 
-- **10:27.937**: :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)
-- **00:59.258**: :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
-- **00:52.137**: :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``)
-- **00:28.777**: :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)
-- **00:25.327**: :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)
-- **00:00.744**: :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)
-- **00:00.582**: :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)
-- **00:00.210**: :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
-- **00:00.054**: :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)
-- **00:00.054**: :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)
-- **00:00.053**: :ref:`sphx_glr_tutorial_install.py` (``install.py``)
-- **00:00.052**: :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)
+- **10:17.768**: :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)
+- **01:07.133**: :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``)
+- **01:01.683**: :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
+- **00:29.117**: :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)
+- **00:23.980**: :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)
+- **00:00.766**: :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)
+- **00:00.603**: :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)
+- **00:00.227**: :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
+- **00:00.050**: :ref:`sphx_glr_tutorial_install.py` (``install.py``)
+- **00:00.048**: :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)
+- **00:00.046**: :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)
+- **00:00.039**: :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 0c5d5e684..c5daa5101 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -253,7 +253,7 @@ helper function to run a profile of the TVM generated code.
  .. code-block:: none
 
     Numpy running time: 0.000008
-    naive: 0.000007
+    naive: 0.000006
 
 
 
@@ -344,7 +344,7 @@ compile and run this new schedule with the parallel operation applied:
 
  .. code-block:: none
 
-    parallel: 0.000007
+    parallel: 0.000006
 
 
 
@@ -447,10 +447,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    7.976720003171067e-06                    1.0
-                   naive    6.7113000000000004e-06    0.8413608597684248
-                parallel    6.965200000000001e-06     0.8731909854214581
-                  vector             2.47466e-05      3.1023528455508322
+                   numpy    8.337970002685325e-06                    1.0
+                   naive              5.8446e-06      0.7009619845259325
+                parallel              6.0472e-06      0.7252604648436538
+                  vector             2.46431e-05       2.955527543522398
 
 
 
@@ -839,7 +839,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.018677
+    Numpy running time: 0.019175
 
 
 
@@ -897,7 +897,7 @@ optimizations.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    none: 3.237292
+    none: 3.416501
 
 
 
@@ -996,7 +996,7 @@ schedule.
 
  .. code-block:: none
 
-    blocking: 0.330897
+    blocking: 0.328160
 
 
 
@@ -1088,7 +1088,7 @@ already cache friendly from our previous optimizations.
 
  .. code-block:: none
 
-    vectorization: 0.349767
+    vectorization: 0.351937
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1160,7 +1160,7 @@ more cache friendly.
 
  .. code-block:: none
 
-    loop permutation: 0.119976
+    loop permutation: 0.122959
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1257,7 +1257,7 @@ optimized schedule.
 
  .. code-block:: none
 
-    array packing: 0.107945
+    array packing: 0.110771
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1348,7 +1348,7 @@ to `C` when all the block results are ready.
 
  .. code-block:: none
 
-    block caching: 0.110603
+    block caching: 0.111348
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1432,7 +1432,7 @@ of thread-level parallelization.
 
  .. code-block:: none
 
-    parallelization: 0.144977
+    parallelization: 0.145330
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1511,13 +1511,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none            3.2372921475                     1.0
-                blocking            0.3308970816     0.10221415507881652
-           vectorization            0.3497665616     0.10804294010662813
-        loop permutation     0.11997613389999999     0.03706064464792021
-           array packing     0.10794521769999998     0.03334429294043194
-           block caching            0.1106034309    0.034165415372045906
-         parallelization     0.14497725849999998     0.04478349555567876
+                    none      3.4165014756999996                     1.0
+                blocking            0.3281601215     0.09605150878290312
+           vectorization            0.3519371577     0.10301097780965904
+        loop permutation            0.1229586824     0.03598964709207604
+           array packing            0.1107714357     0.03242247559026863
+           block caching            0.1113480721    0.032591255379799344
+         parallelization            0.1453304095     0.04253778625113094
 
 
 
@@ -1552,6 +1552,11 @@ operations with tunable parameters that allows you to automatically optimize
 the computation for specific platforms.
 
 
+.. rst-class:: sphx-glr-timing
+
+   **Total running time of the script:** ( 1 minutes  1.683 seconds)
+
+
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
 
 
diff --git a/docs/commit_hash b/docs/commit_hash
index 9ec96ba5a..8d1cc8069 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-12440895e4baad1de494f0a3876edee3e1df06ee
+81702192b49ddb37ce3e179eec3e88f3726acec1
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index a1d9bbc15..83859ce84 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -401,7 +401,7 @@
 </div>
 <img alt="../../_images/sphx_glr_from_mxnet_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_from_mxnet_001.png" />
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip739d518d-7aa3-4b94-975e-12f5615fe057 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip64a722c4-ea57-41fc-8352-ca338bb20723 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index 26df924fd..3688df325 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -406,48 +406,41 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
-  0%|          | 16.0k/41.5M [00:00&lt;07:35, 95.5kB/s]
-  0%|          | 40.0k/41.5M [00:00&lt;05:53, 123kB/s]
-  0%|          | 96.0k/41.5M [00:00&lt;03:18, 219kB/s]
-  0%|          | 160k/41.5M [00:00&lt;02:33, 282kB/s]
-  1%|          | 328k/41.5M [00:00&lt;01:20, 538kB/s]
-  1%|1         | 568k/41.5M [00:01&lt;00:51, 839kB/s]
-  3%|2         | 1.10M/41.5M [00:01&lt;00:25, 1.65MB/s]
-  5%|5         | 2.19M/41.5M [00:01&lt;00:12, 3.22MB/s]
-  9%|8         | 3.66M/41.5M [00:01&lt;00:07, 5.01MB/s]
- 12%|#2        | 5.13M/41.5M [00:01&lt;00:06, 6.23MB/s]
- 16%|#5        | 6.60M/41.5M [00:01&lt;00:05, 7.03MB/s]
- 18%|#7        | 7.27M/41.5M [00:02&lt;00:05, 6.08MB/s]
- 20%|##        | 8.45M/41.5M [00:02&lt;00:05, 6.42MB/s]
- 23%|##3       | 9.55M/41.5M [00:02&lt;00:05, 6.29MB/s]
- 26%|##5       | 10.7M/41.5M [00:02&lt;00:04, 6.63MB/s]
- 29%|##9       | 12.1M/41.5M [00:02&lt;00:04, 7.32MB/s]
- 33%|###2      | 13.6M/41.5M [00:02&lt;00:03, 7.81MB/s]
- 36%|###6      | 15.1M/41.5M [00:03&lt;00:03, 8.14MB/s]
- 40%|###9      | 16.5M/41.5M [00:03&lt;00:03, 8.38MB/s]
- 43%|####3     | 18.0M/41.5M [00:03&lt;00:02, 9.69MB/s]
- 46%|####5     | 19.0M/41.5M [00:03&lt;00:02, 9.89MB/s]
- 48%|####8     | 20.0M/41.5M [00:03&lt;00:02, 8.81MB/s]
- 50%|#####     | 20.9M/41.5M [00:03&lt;00:02, 8.97MB/s]
- 54%|#####4    | 22.4M/41.5M [00:03&lt;00:01, 10.2MB/s]
- 56%|#####6    | 23.4M/41.5M [00:03&lt;00:02, 9.46MB/s]
- 59%|#####8    | 24.4M/41.5M [00:04&lt;00:02, 8.16MB/s]
- 61%|######1   | 25.4M/41.5M [00:04&lt;00:02, 7.72MB/s]
- 65%|######4   | 26.8M/41.5M [00:04&lt;00:01, 8.12MB/s]
- 67%|######6   | 27.6M/41.5M [00:04&lt;00:02, 6.95MB/s]
- 69%|######8   | 28.4M/41.5M [00:04&lt;00:02, 6.33MB/s]
- 71%|#######1  | 29.5M/41.5M [00:05&lt;00:01, 6.46MB/s]
- 74%|#######3  | 30.6M/41.5M [00:05&lt;00:01, 6.56MB/s]
- 77%|#######6  | 31.8M/41.5M [00:05&lt;00:01, 6.66MB/s]
- 79%|#######9  | 32.9M/41.5M [00:05&lt;00:01, 6.76MB/s]
- 82%|########2 | 34.1M/41.5M [00:05&lt;00:01, 6.85MB/s]
- 85%|########5 | 35.3M/41.5M [00:05&lt;00:00, 6.93MB/s]
- 88%|########7 | 36.5M/41.5M [00:06&lt;00:00, 7.03MB/s]
- 91%|######### | 37.7M/41.5M [00:06&lt;00:00, 7.12MB/s]
- 94%|#########3| 38.9M/41.5M [00:06&lt;00:00, 7.19MB/s]
- 97%|#########6| 40.1M/41.5M [00:06&lt;00:00, 7.27MB/s]
-100%|#########9| 41.3M/41.5M [00:06&lt;00:00, 7.95MB/s]
-100%|##########| 41.5M/41.5M [00:06&lt;00:00, 6.45MB/s]
+  0%|          | 16.0k/41.5M [00:00&lt;08:16, 87.5kB/s]
+  0%|          | 40.0k/41.5M [00:00&lt;06:24, 113kB/s]
+  0%|          | 96.0k/41.5M [00:00&lt;03:36, 201kB/s]
+  0%|          | 160k/41.5M [00:00&lt;02:47, 259kB/s]
+  1%|          | 336k/41.5M [00:00&lt;01:24, 511kB/s]
+  1%|1         | 544k/41.5M [00:01&lt;00:59, 722kB/s]
+  3%|2         | 1.08M/41.5M [00:01&lt;00:28, 1.48MB/s]
+  5%|5         | 2.16M/41.5M [00:01&lt;00:14, 2.92MB/s]
+  9%|8         | 3.64M/41.5M [00:01&lt;00:08, 4.60MB/s]
+ 12%|#2        | 5.12M/41.5M [00:01&lt;00:06, 5.72MB/s]
+ 16%|#5        | 6.60M/41.5M [00:02&lt;00:05, 6.50MB/s]
+ 19%|#9        | 8.09M/41.5M [00:02&lt;00:04, 7.04MB/s]
+ 23%|##3       | 9.56M/41.5M [00:02&lt;00:04, 7.40MB/s]
+ 27%|##6       | 11.0M/41.5M [00:02&lt;00:04, 7.65MB/s]
+ 30%|###       | 12.5M/41.5M [00:02&lt;00:03, 7.84MB/s]
+ 34%|###3      | 14.0M/41.5M [00:03&lt;00:03, 7.96MB/s]
+ 37%|###7      | 15.5M/41.5M [00:03&lt;00:03, 8.04MB/s]
+ 41%|####      | 17.0M/41.5M [00:03&lt;00:03, 8.10MB/s]
+ 44%|####4     | 18.4M/41.5M [00:03&lt;00:02, 8.15MB/s]
+ 48%|####8     | 19.9M/41.5M [00:03&lt;00:02, 8.18MB/s]
+ 52%|#####1    | 21.4M/41.5M [00:03&lt;00:02, 8.19MB/s]
+ 55%|#####5    | 22.9M/41.5M [00:04&lt;00:02, 8.22MB/s]
+ 59%|#####8    | 24.4M/41.5M [00:04&lt;00:02, 8.22MB/s]
+ 62%|######2   | 25.8M/41.5M [00:04&lt;00:01, 8.23MB/s]
+ 66%|######5   | 27.3M/41.5M [00:04&lt;00:01, 8.24MB/s]
+ 69%|######9   | 28.8M/41.5M [00:04&lt;00:01, 8.24MB/s]
+ 73%|#######2  | 30.3M/41.5M [00:05&lt;00:01, 8.23MB/s]
+ 77%|#######6  | 31.8M/41.5M [00:05&lt;00:01, 8.23MB/s]
+ 80%|########  | 33.2M/41.5M [00:05&lt;00:01, 8.25MB/s]
+ 84%|########3 | 34.7M/41.5M [00:05&lt;00:00, 8.24MB/s]
+ 87%|########7 | 36.2M/41.5M [00:05&lt;00:00, 8.25MB/s]
+ 91%|######### | 37.7M/41.5M [00:06&lt;00:00, 8.25MB/s]
+ 94%|#########4| 39.2M/41.5M [00:06&lt;00:00, 8.25MB/s]
+ 98%|#########7| 40.6M/41.5M [00:06&lt;00:00, 8.24MB/s]
+100%|##########| 41.5M/41.5M [00:06&lt;00:00, 6.79MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_paddle.html b/docs/how_to/compile_models/from_paddle.html
index d1841ac1e..e4cbbaa62 100644
--- a/docs/how_to/compile_models/from_paddle.html
+++ b/docs/how_to/compile_models/from_paddle.html
@@ -469,7 +469,7 @@ A quick solution is</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>TVM prediction top-1 id: 282, class name:  282: &#39;tiger cat&#39;,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  18.358 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  6.280 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-paddle-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/16269b77359771348d507395692524cf/from_paddle.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_paddle.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 37a2359ef..07dddf960 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -387,9 +387,9 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 45%|####4     | 19.9M/44.7M [00:00&lt;00:00, 209MB/s]
- 93%|#########3| 41.6M/44.7M [00:00&lt;00:00, 220MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 220MB/s]
+ 34%|###4      | 15.2M/44.7M [00:00&lt;00:00, 159MB/s]
+ 83%|########3 | 37.3M/44.7M [00:00&lt;00:00, 201MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 200MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 0787549e3..b946c009b 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -612,7 +612,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.604 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  8.900 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index b2bd9653b..4db7ecfab 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -300,18 +300,18 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:42.222</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>05:32.395</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <ul class="simple">
-<li><p><strong>01:18.358</strong>: <a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></li>
-<li><p><strong>01:04.604</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
-<li><p><strong>00:59.131</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
-<li><p><strong>00:33.456</strong>: <a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></li>
-<li><p><strong>00:24.167</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
-<li><p><strong>00:24.139</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
-<li><p><strong>00:22.248</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
-<li><p><strong>00:19.924</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
-<li><p><strong>00:13.662</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
-<li><p><strong>00:02.534</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
+<li><p><strong>01:08.900</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
+<li><p><strong>01:06.280</strong>: <a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></li>
+<li><p><strong>00:58.537</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
+<li><p><strong>00:32.878</strong>: <a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></li>
+<li><p><strong>00:24.378</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
+<li><p><strong>00:22.891</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
+<li><p><strong>00:22.038</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
+<li><p><strong>00:19.623</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
+<li><p><strong>00:14.138</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
+<li><p><strong>00:02.733</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index c84f9c847..3648e936f 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -627,7 +627,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  16.2313      16.2338      16.2924      16.1510       0.0399
+  16.2210      16.2544      16.3771      15.9601       0.1360
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index 40ada9c50..eb7789f78 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -409,14 +409,13 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
- 11%|#1        | 19.0M/170M [00:00&lt;00:00, 200MB/s]
- 25%|##4       | 42.0M/170M [00:00&lt;00:00, 224MB/s]
- 38%|###8      | 65.2M/170M [00:00&lt;00:00, 233MB/s]
- 53%|#####3    | 90.0M/170M [00:00&lt;00:00, 244MB/s]
- 68%|######8   | 116M/170M [00:00&lt;00:00, 254MB/s]
- 83%|########3 | 141M/170M [00:00&lt;00:00, 258MB/s]
- 98%|#########7| 166M/170M [00:00&lt;00:00, 259MB/s]
-100%|##########| 170M/170M [00:00&lt;00:00, 250MB/s]
+ 11%|#1        | 19.5M/170M [00:00&lt;00:00, 205MB/s]
+ 27%|##7       | 46.2M/170M [00:00&lt;00:00, 249MB/s]
+ 43%|####3     | 73.3M/170M [00:00&lt;00:00, 265MB/s]
+ 58%|#####8    | 98.5M/170M [00:00&lt;00:00, 247MB/s]
+ 73%|#######3  | 125M/170M [00:00&lt;00:00, 256MB/s]
+ 89%|########9 | 151M/170M [00:00&lt;00:00, 264MB/s]
+100%|##########| 170M/170M [00:00&lt;00:00, 253MB/s]
 /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=&#39;floor&#39;).
@@ -514,7 +513,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  4.934 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  5.231 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index cf36ce113..c2c2e9791 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -450,7 +450,7 @@ training. Other models require a full post training calibration.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 156MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 153MB/s]
 </pre></div>
 </div>
 </div>
@@ -544,7 +544,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.6083      90.3127      97.9939      90.1749       1.1198
+  90.4766      90.4321      91.2257      90.2658       0.1759
 </pre></div>
 </div>
 <div class="admonition note">
@@ -583,7 +583,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  8.829 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  8.638 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index f9e07bd38..ae75a982d 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -545,7 +545,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  119.0403     118.8458     123.3791     118.2175      0.7745
+  120.9448     120.9608     121.7090     119.9896      0.3720
 </pre></div>
 </div>
 <div class="admonition note">
@@ -573,7 +573,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  58.903 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  54.339 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 0c1d1068f..8e60a60df 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -482,7 +482,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  22.221 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  15.411 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index ee06f7624..05139251a 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -415,24 +415,24 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  4%|4         | 5475/132723 [00:00&lt;00:02, 54745.09KB/s]
- 10%|9         | 13114/132723 [00:00&lt;00:01, 67474.21KB/s]
- 16%|#5        | 20717/132723 [00:00&lt;00:01, 71377.65KB/s]
- 21%|##1       | 28178/132723 [00:00&lt;00:01, 72650.70KB/s]
- 27%|##7       | 36266/132723 [00:00&lt;00:01, 75610.49KB/s]
- 33%|###3      | 44249/132723 [00:00&lt;00:01, 77043.00KB/s]
- 39%|###9      | 52130/132723 [00:00&lt;00:01, 77617.80KB/s]
- 45%|####5     | 60085/132723 [00:00&lt;00:00, 78229.09KB/s]
- 51%|#####1    | 68026/132723 [00:00&lt;00:00, 78593.39KB/s]
- 57%|#####7    | 76040/132723 [00:01&lt;00:00, 79069.09KB/s]
- 63%|######3   | 83947/132723 [00:01&lt;00:00, 78828.58KB/s]
- 69%|######9   | 91831/132723 [00:01&lt;00:00, 78703.97KB/s]
- 75%|#######5  | 99702/132723 [00:01&lt;00:00, 61163.95KB/s]
- 81%|########1 | 107529/132723 [00:01&lt;00:00, 65458.19KB/s]
- 87%|########7 | 115478/132723 [00:01&lt;00:00, 69161.98KB/s]
- 93%|#########3| 123516/132723 [00:01&lt;00:00, 72232.38KB/s]
- 99%|#########9| 131652/132723 [00:01&lt;00:00, 74796.98KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 73288.21KB/s]
+  4%|4         | 5503/132723 [00:00&lt;00:02, 55023.49KB/s]
+ 10%|9         | 13158/132723 [00:00&lt;00:01, 67681.39KB/s]
+ 15%|#5        | 19927/132723 [00:00&lt;00:02, 49903.60KB/s]
+ 21%|##        | 27489/132723 [00:00&lt;00:01, 58253.64KB/s]
+ 25%|##5       | 33751/132723 [00:00&lt;00:01, 53655.50KB/s]
+ 31%|###1      | 41251/132723 [00:00&lt;00:01, 59831.56KB/s]
+ 37%|###6      | 49083/132723 [00:00&lt;00:01, 65238.45KB/s]
+ 43%|####2     | 56965/132723 [00:00&lt;00:01, 69239.68KB/s]
+ 49%|####8     | 64857/132723 [00:01&lt;00:00, 72105.14KB/s]
+ 55%|#####4    | 72739/132723 [00:01&lt;00:00, 74095.80KB/s]
+ 61%|######    | 80634/132723 [00:01&lt;00:00, 75541.82KB/s]
+ 67%|######6   | 88277/132723 [00:01&lt;00:00, 70922.11KB/s]
+ 72%|#######2  | 96063/132723 [00:01&lt;00:00, 72900.58KB/s]
+ 78%|#######8  | 103974/132723 [00:01&lt;00:00, 74695.77KB/s]
+ 84%|########4 | 111854/132723 [00:01&lt;00:00, 75895.29KB/s]
+ 90%|######### | 119783/132723 [00:01&lt;00:00, 76893.64KB/s]
+ 96%|#########6| 127718/132723 [00:01&lt;00:00, 77620.00KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 69911.00KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -477,7 +477,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 </pre></div>
 </div>
 <img alt="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" />
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  22.745 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  23.592 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index 3194ce155..351d365af 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -300,16 +300,16 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>10:51.548</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>10:40.235</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <ul class="simple">
-<li><p><strong>03:04.934</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
-<li><p><strong>02:22.745</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
-<li><p><strong>01:58.903</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
-<li><p><strong>01:22.221</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
-<li><p><strong>01:08.829</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
-<li><p><strong>00:30.761</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
-<li><p><strong>00:22.948</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
-<li><p><strong>00:00.207</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
+<li><p><strong>03:05.231</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
+<li><p><strong>02:23.592</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
+<li><p><strong>01:54.339</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
+<li><p><strong>01:15.411</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
+<li><p><strong>01:08.638</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
+<li><p><strong>00:30.284</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
+<li><p><strong>00:22.527</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
+<li><p><strong>00:00.213</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 29908aed8..449bf1494 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -590,7 +590,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipa89ba127-910b-4223-b077-7869f3b71209 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip53471836-d01e-465a-9d91-6da1f00778d7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index c855cc285..1f5ef39ac 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -300,12 +300,12 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:42.181</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:40.820</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:38.303</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
-<li><p><strong>00:02.499</strong>: <a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></li>
-<li><p><strong>00:01.157</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
-<li><p><strong>00:00.222</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
+<li><p><strong>00:37.072</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
+<li><p><strong>00:02.412</strong>: <a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></li>
+<li><p><strong>00:01.120</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
+<li><p><strong>00:00.217</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index b1446407a..351ca963c 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -486,10 +486,10 @@ profile the execution time of each passes.</p>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6670us [6670us] (45.74%; 45.74%)
-FoldScaleAxis: 7911us [7us] (54.26%; 54.26%)
-        FoldConstant: 7904us [1613us] (54.21%; 99.91%)
-                InferType: 6291us [6291us] (43.14%; 79.59%)
+InferType: 6619us [6619us] (45.79%; 45.79%)
+FoldScaleAxis: 7836us [6us] (54.21%; 54.21%)
+        FoldConstant: 7830us [1592us] (54.17%; 99.93%)
+                InferType: 6238us [6238us] (43.16%; 79.67%)
 </pre></div>
 </div>
 </div>
@@ -512,10 +512,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6425us [6425us] (44.75%; 44.75%)
-FoldScaleAxis: 7931us [6us] (55.25%; 55.25%)
-        FoldConstant: 7925us [1635us] (55.20%; 99.92%)
-                InferType: 6290us [6290us] (43.82%; 79.37%)
+InferType: 6322us [6322us] (44.82%; 44.82%)
+FoldScaleAxis: 7784us [5us] (55.18%; 55.18%)
+        FoldConstant: 7779us [1610us] (55.15%; 99.94%)
+                InferType: 6169us [6169us] (43.73%; 79.30%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index 46a37ca20..d8d1f5944 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -534,7 +534,7 @@ latency of convolution.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 34.720307 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 44.037937 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index f0bb06197..e16aaa9ce 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -878,7 +878,7 @@ be able to run on our build server</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 8.761867 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 13.268366 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index 6f33529b5..a3a9d7a53 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -431,8 +431,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019098
-Baseline: 3.258288
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019966
+Baseline: 3.460861
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -494,7 +494,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.317547
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.320441
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -563,7 +563,7 @@ vastly.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.344534
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.348261
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -626,7 +626,7 @@ the access pattern for A matrix is more cache friendly.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.119541
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.122005
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -711,7 +711,7 @@ flattening.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.111624
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.111262
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -799,7 +799,7 @@ write to C when all the block results are ready.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.112169
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.112845
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -891,7 +891,7 @@ write to C when all the block results are ready.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.145835
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.144978
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index 4f3eb2045..d8f05f246 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -300,11 +300,11 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:35.067</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:36.029</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:32.377</strong>: <a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></li>
-<li><p><strong>00:01.461</strong>: <a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></li>
-<li><p><strong>00:01.229</strong>: <a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></li>
+<li><p><strong>00:33.200</strong>: <a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></li>
+<li><p><strong>00:01.548</strong>: <a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></li>
+<li><p><strong>00:01.281</strong>: <a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index a9d88fde2..5ee293cc6 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -300,14 +300,14 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:31.927</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>05:29.427</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <ul class="simple">
-<li><p><strong>02:47.014</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
-<li><p><strong>01:22.372</strong>: <a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></li>
-<li><p><strong>00:44.176</strong>: <a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></li>
-<li><p><strong>00:19.848</strong>: <a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></li>
-<li><p><strong>00:09.562</strong>: <a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></li>
-<li><p><strong>00:08.955</strong>: <a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></li>
+<li><p><strong>02:48.697</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
+<li><p><strong>01:21.538</strong>: <a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></li>
+<li><p><strong>00:43.895</strong>: <a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></li>
+<li><p><strong>00:17.124</strong>: <a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></li>
+<li><p><strong>00:09.329</strong>: <a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></li>
+<li><p><strong>00:08.845</strong>: <a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index 3ed4bc967..9f1059d4e 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -470,43 +470,484 @@ cooperative fetching, unrolling and operator fusion.</p>
              compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
   preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-  allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [84]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [384]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [1], [], scope=&quot;local&quot;, align=4)[0] = 0f32
+  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 28;
+  allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
+    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope=&quot;local&quot;, align=32)[0] = 0f32
     conv2d_nchw_1[1] = 0f32
     conv2d_nchw_1[2] = 0f32
     conv2d_nchw_1[3] = 0f32
-    for (rc.outer.outer: int32, 0, 128) {
-      for (rx.outer.outer: int32, 0, 3) {
-        attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        for (ax0.ax1.fused.ax2.fused.ax3.fused.inner.s: int32, 0, 3) {
-          if @tir.likely((threadIdx.x_1 &lt; 28), dtype=bool) {
-            pad_temp.shared_1: Buffer(pad_temp.shared, float32, [84], [], scope=&quot;shared&quot;)[((threadIdx.x_1*3) + ax0.ax1.fused.ax2.fused.ax3.fused.inner.s)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod(((threadIdx.x_1*3) + ax0.ax1.fused.ax2.fused.ax3.fused.inner.s), 21), 7) + floormod(blockIdx.x, 7))) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*3) + ax0.ax1.fused.ax2.fused.ax3.fused.inner.s), 21), 7) + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer [...]
-          }
-        }
-        for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 7) {
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-          if @tir.likely((((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*7) + floordiv(threadIdx.x_2, 8)) &lt; 48), dtype=bool) {
-            kernel.shared_1: Buffer(kernel.shared, float32, [384], [], scope=&quot;shared&quot;)[((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2)] = kernel[((((((floordiv(blockIdx.x, 7)*147456) + (floordiv(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*14) + floordiv(threadIdx.x_2, 4)), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + threadIdx.x_2), 12), 3)*9)) + (floormod(((ax0.ax1.fused.ax2.fused.ax3.fused. [...]
-          }
-        }
-        for (rc.inner: int32, 0, 4) {
-          for (ry.inner: int32, 0, 3) {
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner) + 96)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner) + 192)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.inner*21) + (ry.inner*7)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*12) + (rc.inner*3)) + ry.inner) + 288)]))
+    conv2d_nchw_1[4] = 0f32
+    conv2d_nchw_1[5] = 0f32
+    conv2d_nchw_1[6] = 0f32
+    conv2d_nchw_1[7] = 0f32
+    conv2d_nchw_1[8] = 0f32
+    conv2d_nchw_1[9] = 0f32
+    conv2d_nchw_1[10] = 0f32
+    conv2d_nchw_1[11] = 0f32
+    conv2d_nchw_1[12] = 0f32
+    conv2d_nchw_1[13] = 0f32
+    for (rc.outer.outer: int32, 0, 64) {
+      for (ry.outer.outer: int32, 0, 3) {
+        let cse_var_2: int32 = (rc.outer.outer*72)
+        let cse_var_1: int32 = (ry.outer.outer*3)
+         {
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope=&quot;shared&quot;)[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) +  [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0 [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0 [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0 [...]
+            }
           }
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 8), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 16), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 32), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 32), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 64), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 40), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 80), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 56), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 112), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 64), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 128), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 80), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 160), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 88), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 176), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 104), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 208), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 112), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 224), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 128), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 256), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 136), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 272), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 152), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 304), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 160), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 320), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 176), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 352), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 184), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 368), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 200), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 400), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 208), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 416), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 224), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 448), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 232), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 464), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 248), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 496), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 256), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 512), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 272), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 544), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 280), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 560), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 296), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 592), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 304), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 608), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 320), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 640), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 328), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 656), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 344), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 688), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 352), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 704), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 368), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 736), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 376), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 752), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
         }
       }
     }
-    compute[((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[0] + bias[((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7))]), 0f32)
-    compute[(((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 392)] = max((conv2d_nchw_1[1] + bias[(((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7)) + 8)]), 0f32)
-    compute[(((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 784)] = max((conv2d_nchw_1[2] + bias[(((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7)) + 16)]), 0f32)
-    compute[(((((floordiv(blockIdx.x, 7)*1568) + (floordiv(threadIdx.x, 7)*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 1176)] = max((conv2d_nchw_1[3] + bias[(((floordiv(blockIdx.x, 7)*32) + floordiv(threadIdx.x, 7)) + 24)]), 0f32)
+    for (i1.inner: int32, 0, 2) {
+      for (i3.inner: int32, 0, 7) {
+        compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
+      }
+    }
   }
 }
 </pre></div>
@@ -543,7 +984,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.389 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.358 ms
 </pre></div>
 </div>
 </div>
@@ -574,35 +1015,35 @@ conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
 conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
-conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=4)
+conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
 conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
 conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
 conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=4)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=1)
-conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
+conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
+conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
 conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
 conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
+conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
-compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=4)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
 compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
 compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
 compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
 s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
 s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -622,14 +1063,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=3)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 0)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -647,42 +1088,431 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[4];
-  __shared__ float pad_temp_shared[84];
-  __shared__ float kernel_shared[384];
+extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[14];
+  __shared__ float pad_temp_shared[72];
+  __shared__ float kernel_shared[3072];
   conv2d_nchw[0] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
   conv2d_nchw[3] = 0.000000e+00f;
-  for (int rc_outer_outer = 0; rc_outer_outer &lt; 128; ++rc_outer_outer) {
-    for (int rx_outer_outer = 0; rx_outer_outer &lt; 3; ++rx_outer_outer) {
+  conv2d_nchw[4] = 0.000000e+00f;
+  conv2d_nchw[5] = 0.000000e+00f;
+  conv2d_nchw[6] = 0.000000e+00f;
+  conv2d_nchw[7] = 0.000000e+00f;
+  conv2d_nchw[8] = 0.000000e+00f;
+  conv2d_nchw[9] = 0.000000e+00f;
+  conv2d_nchw[10] = 0.000000e+00f;
+  conv2d_nchw[11] = 0.000000e+00f;
+  conv2d_nchw[12] = 0.000000e+00f;
+  conv2d_nchw[13] = 0.000000e+00f;
+  for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
+    for (int ry_outer_outer = 0; ry_outer_outer &lt; 3; ++ry_outer_outer) {
       __syncthreads();
-      for (int ax0_ax1_fused_ax2_fused_ax3_fused_inner_s = 0; ax0_ax1_fused_ax2_fused_ax3_fused_inner_s &lt; 3; ++ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) {
-        if (((int)threadIdx.x) &lt; 28) {
-          pad_temp_shared[((((int)threadIdx.x) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s)] = (((((1 &lt;= (((((((int)threadIdx.x) % 7) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) / 7) + (((int)blockIdx.x) % 7))) &amp;&amp; ((((((((int)threadIdx.x) % 7) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) / 7) + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 3) + ax0_ax1_fused_ax2_fused_ax3_fused_inner_s) % 7)))) &amp;&amp; ((rx_o [...]
-        }
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 9))) &amp;&amp; (((((int)threadIdx.x) * 4) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
       }
-      for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer &lt; 7; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) {
-        if (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 7) + (((int)threadIdx.x) &gt;&gt; 3)) &lt; 48) {
-          kernel_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 56) + ((int)threadIdx.x))] = kernel[(((((((((int)blockIdx.x) / 7) * 147456) + ((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 14) + (((int)threadIdx.x) &gt;&gt; 2)) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 8) + ((int)threadIdx.x)) % 12) / 3) * 9)) + ((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 2) + ((int)threadIdx.x)) % 3) * 3)) + rx_outer_outer)];
-        }
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
       }
-      __syncthreads();
-      for (int rc_inner = 0; rc_inner &lt; 4; ++rc_inner) {
-        for (int ry_inner = 0; ry_inner &lt; 3; ++ry_inner) {
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner) + 96)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner) + 192)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_inner * 21) + (ry_inner * 7)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 12) + (rc_inner * 3)) + ry_inner) + 288)]));
-        }
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
+      }
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
       }
+      kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
+      kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
+      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
+      kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
+      kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
+      kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
+      kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+      kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
+      kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
+      kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
+      kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
+      kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
+      kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
+      kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
+      kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
+      kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      __syncthreads();
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+    }
+  }
+  for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
+    for (int i3_inner = 0; i3_inner &lt; 7; ++i3_inner) {
+      compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
     }
   }
-  compute[(((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[0] + bias[(((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
-  compute[((((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7)) + 392)] = max((conv2d_nchw[1] + bias[((((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7)) + 8)]), 0.000000e+00f);
-  compute[((((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7)) + 784)] = max((conv2d_nchw[2] + bias[((((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7)) + 16)]), 0.000000e+00f);
-  compute[((((((((int)blockIdx.x) / 7) * 1568) + ((((int)threadIdx.x) / 7) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + (((int)threadIdx.x) % 7)) + 1176)] = max((conv2d_nchw[3] + bias[((((((int)blockIdx.x) / 7) * 32) + (((int)threadIdx.x) / 7)) + 24)]), 0.000000e+00f);
 }
 </pre></div>
 </div>
@@ -717,9 +1547,10 @@ In the example below we resume the status and do more 5 trials.</p>
 /usr/local/lib/python3.7/dist-packages/xgboost/training.py:17: UserWarning: Old style callback is deprecated.  See: https://xgboost.readthedocs.io/en/latest/python/callbacks.html
   warnings.warn(f&#39;Old style callback is deprecated.  See: {link}&#39;, UserWarning)
 Get devices for measurement successfully!
+.T
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  47.014 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  48.697 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index 9c43de3b3..47f244ac5 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -878,7 +878,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-   9.8004       9.8002       9.8580       9.7429       0.0470
+   9.8616       9.8832       9.8934       9.8081       0.0380
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index ded2e9f65..05be05a55 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -897,7 +897,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  765.6064     766.6715     766.8813     763.2664      1.6568
+  769.3796     770.1538     771.6261     766.3590      2.2188
 </pre></div>
 </div>
 </div>
@@ -919,7 +919,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  22.372 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  21.538 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index 767c0e0af..5d7b0b25d 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -600,14 +600,14 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
              placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
   buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-  preflattened_buffer_map = {placeholder_8: placeholder_15: Buffer(placeholder_13, int32, [33], []), placeholder_6: placeholder_16: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_5: placeholder_17: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_18: Buffer(placeholder_12, int32, [4916], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
-  for (i0.outer.i1.outer.fused: int32, 0, 64) &quot;parallel&quot; {
-    allocate(compute_4: Pointer(global float32), float32, [1024]), storage_scope = global {
-      for (i.outer.inner: int32, 0, 4) {
+  preflattened_buffer_map = {placeholder_5: placeholder_15: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_16: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_6: placeholder_18: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
+  for (i0.outer.i1.outer.fused: int32, 0, 128) &quot;parallel&quot; {
+    allocate(compute_4: Pointer(global float32), float32, [512]), storage_scope = global {
+      for (nb_j.inner: int32, 0, 2) {
         for (i.inner.init: int32, 0, 16) {
-          let cse_var_1: int32 = ((i.outer.inner*256) + (i.inner.init*16))
+          let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
            {
-            compute_5: Buffer(compute_4, float32, [1024], [])[cse_var_1] = 0f32
+            compute_5: Buffer(compute_4, float32, [512], [])[cse_var_1] = 0f32
             compute_5[(cse_var_1 + 1)] = 0f32
             compute_5[(cse_var_1 + 2)] = 0f32
             compute_5[(cse_var_1 + 3)] = 0f32
@@ -625,81 +625,51 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
             compute_5[(cse_var_1 + 15)] = 0f32
           }
         }
-        for (elem_idx: int32, 0, let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
+        for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
           for (i.inner: int32, 0, 16) {
-            let cse_var_3: int32 = floormod(i0.outer.i1.outer.fused, 32)
+            let cse_var_21: int32 = (elem_idx*16)
+            let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
+            let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
+            let cse_var_18: int32 = (cse_var_20 + 1)
+            let cse_var_17: int32 = (cse_var_20 + 11)
+            let cse_var_16: int32 = (cse_var_20 + 12)
+            let cse_var_15: int32 = (cse_var_20 + 13)
+            let cse_var_14: int32 = (cse_var_20 + 14)
+            let cse_var_13: int32 = (cse_var_20 + 15)
+            let cse_var_12: int32 = (cse_var_20 + 2)
+            let cse_var_11: int32 = (cse_var_20 + 3)
+            let cse_var_10: int32 = (cse_var_20 + 4)
+            let cse_var_9: int32 = (cse_var_20 + 5)
+            let cse_var_8: int32 = (cse_var_20 + 6)
+            let cse_var_7: int32 = (cse_var_20 + 7)
+            let cse_var_6: int32 = (cse_var_20 + 8)
+            let cse_var_5: int32 = (cse_var_20 + 9)
+            let cse_var_4: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i.inner*256))
+            let cse_var_3: int32 = (cse_var_20 + 10)
              {
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_4: int32 = ((i.outer.inner*256) + (i.inner*16))
-                compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[((placeholder_3[cse_var_3]*16) + (elem_idx*16))]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_5: int32 = (((i.outer.inner*256) + (i.inner*16)) + 1)
-                compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 1)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_6: int32 = (((i.outer.inner*256) + (i.inner*16)) + 2)
-                compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 2)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_7: int32 = (((i.outer.inner*256) + (i.inner*16)) + 3)
-                compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 3)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_8: int32 = (((i.outer.inner*256) + (i.inner*16)) + 4)
-                compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 4)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_9: int32 = (((i.outer.inner*256) + (i.inner*16)) + 5)
-                compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 5)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_10: int32 = (((i.outer.inner*256) + (i.inner*16)) + 6)
-                compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 6)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_11: int32 = (((i.outer.inner*256) + (i.inner*16)) + 7)
-                compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 7)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_12: int32 = (((i.outer.inner*256) + (i.inner*16)) + 8)
-                compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 8)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_13: int32 = (((i.outer.inner*256) + (i.inner*16)) + 9)
-                compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 9)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_14: int32 = (((i.outer.inner*256) + (i.inner*16)) + 10)
-                compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 10)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_15: int32 = (((i.outer.inner*256) + (i.inner*16)) + 11)
-                compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 11)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_16: int32 = (((i.outer.inner*256) + (i.inner*16)) + 12)
-                compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 12)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_17: int32 = (((i.outer.inner*256) + (i.inner*16)) + 13)
-                compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 13)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_18: int32 = (((i.outer.inner*256) + (i.inner*16)) + 14)
-                compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 14)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
-              if @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_3 + 1)] - placeholder_3[cse_var_3])), dtype=bool) {
-                let cse_var_19: int32 = (((i.outer.inner*256) + (i.inner*16)) + 15)
-                compute_5[cse_var_19] = (compute_5[cse_var_19] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + 15)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*16384) + (i.outer.inner*4096)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
-              }
+              compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_4 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
             }
           }
         }
       }
-      for (i0.inner: int32, 0, 64) {
-        let cse_var_20: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*32768) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
-        compute[ramp(cse_var_20, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_20, 1, 16)]), broadcast(0f32, 16))
+      for (i0.inner: int32, 0, 16) {
+        let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+        compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
       }
     }
   }
@@ -738,7 +708,7 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 2.128 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.730 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index a74dcff88..708b1653f 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -300,13 +300,13 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:44.499</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:44.710</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:43.564</strong>: <a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></li>
+<li><p><strong>00:43.780</strong>: <a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></li>
 <li><p><strong>00:00.245</strong>: <a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></li>
-<li><p><strong>00:00.231</strong>: <a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></li>
-<li><p><strong>00:00.230</strong>: <a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></li>
-<li><p><strong>00:00.229</strong>: <a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a Convolutional Network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></li>
+<li><p><strong>00:00.229</strong>: <a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></li>
+<li><p><strong>00:00.228</strong>: <a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a Convolutional Network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></li>
+<li><p><strong>00:00.228</strong>: <a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index af3d66311..ff82db794 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -1142,8 +1142,8 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2885496
-No: 6   GFLOPS: 42.74/42.74     result: MeasureResult(costs=(0.005416478736842105,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5318071842193604, timestamp=1654629038.5398705)       [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
-No: 7   GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 6   GFLOPS: 92.57/92.57     result: MeasureResult(costs=(0.0025007746875,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7914223670959473, timestamp=1654630725.560318)     [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
+No: 7   GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -1266,7 +1266,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 16, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 256, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6225319
-No: 8   GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 8   GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -1389,7 +1389,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,943546
-No: 9   GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 9   GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -1512,7 +1512,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 16, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 16, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2868708
-No: 10  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 10  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
     res = future.result()
   File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
@@ -1530,7 +1530,7 @@ No: 10  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
 TimeoutError
 
         [(&#39;tile_f&#39;, [-1, 32, 2, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4691833
-No: 11  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 11  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -1653,7 +1653,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 2, 64]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,1042124
-No: 12  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 12  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -1776,7 +1776,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 32, 16]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10013405
-No: 13  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 13  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -1899,7 +1899,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6732082
-No: 14  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 14  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -2022,7 +2022,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 4, 32]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7536735
-No: 15  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 15  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -2145,7 +2145,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 128, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,482121
-No: 16  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 16  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -2268,7 +2268,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2824525
-No: 17  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 17  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -2391,7 +2391,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4559286
-No: 18  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 18  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 571, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 523, in _build_func_common
@@ -2514,7 +2514,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 854, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 32, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9677544
-No: 19  GFLOPS: 0.00/42.74      result: Traceback (most recent call last):
+No: 19  GFLOPS: 0.00/92.57      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 721, in __call__
     yield remote, remote.load_module(os.path.split(build_result.filename)[1])
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 685, in run_through_rpc
@@ -2602,7 +2602,7 @@ tvm._ffi.base.TVMError: Traceback (most recent call last):
   15: _PyEval_EvalFrameDefault
   14: 0x0000000000537c30
   13: _PyObject_FastCallKeywords
-  12: 0x00007f821e27efa2
+  12: 0x00007fe1f76a0fa2
   11: _ctypes_callproc
   10: ffi_call
   9: ffi_call_unix64
@@ -2667,7 +2667,7 @@ Traceback (most recent call last):
   21: _PyFunction_FastCallKeywords
   20: _PyEval_EvalFrameDefault
   19: _PyFunction_FastCall      [(&#39;tile_f&#39;, [-1, 8, 2, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6390073
-No: 20  GFLOPS: 144.44/144.44   result: MeasureResult(costs=(0.0016027322899999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4423887729644775, timestamp=1654629065.150454)       [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
+No: 20  GFLOPS: 144.83/144.83   result: MeasureResult(costs=(0.0015984026900000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4392552375793457, timestamp=1654630752.1502788)      [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -2706,7 +2706,7 @@ and measure running time.</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Best config:
 [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
-Time cost of this operator: 0.001978
+Time cost of this operator: 0.001994
 </pre></div>
 </div>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index f2006572c..7f2ef9c7f 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -556,10 +556,10 @@ the tuned operator.</p>
 ########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs
 ---------                                     ---                                           --------  -------  -----              ------  -------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.7     98.722   (1, 2, 10, 10, 3)  2       1
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.106     0.981    (1, 6, 10, 10)     1       1
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.941     0.297    (1, 1, 10, 10, 3)  1       1
-Total_time                                    -                                             316.747   -        -                  -       -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  314.7     98.713   (1, 2, 10, 10, 3)  2       1
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.172     0.995    (1, 6, 10, 10)     1       1
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.932     0.292    (1, 1, 10, 10, 3)  1       1
+Total_time                                    -                                             318.804   -        -                  -       -
 </pre></div>
 </div>
 </div>
@@ -611,10 +611,10 @@ Total_time                                    -
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs
 ---------                                     ---                                           --------  -------  -----              ------  -------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  79.75     96.818   (1, 6, 10, 10, 1)  2       1
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.702     2.066    (1, 6, 10, 10)     1       1
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.919     1.116    (1, 1, 10, 10, 3)  1       1
-Total_time                                    -                                             82.371    -        -                  -       -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  79.65     96.768   (1, 6, 10, 10, 1)  2       1
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.753     2.13     (1, 6, 10, 10)     1       1
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.907     1.102    (1, 1, 10, 10, 3)  1       1
+Total_time                                    -                                             82.311    -        -                  -       -
 </pre></div>
 </div>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index 3e9ccdd8c..1097d4dbd 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -552,8 +552,8 @@ objects to other stuff? We can display some examples from our datasets using <co
 </div>
 <img alt="../../_images/sphx_glr_micro_train_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_micro_train_001.png" />
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmp5yutqdte/images/target contains 8144 images
-/tmp/tmp5yutqdte/images/random contains 5000 images
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmp4fxlxmsm/images/target contains 8144 images
+/tmp/tmp4fxlxmsm/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -666,11 +666,11 @@ the time on our validation set).</p>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 55s - loss: 0.2116 - accuracy: 0.9278 - val_loss: 0.1487 - val_accuracy: 0.9509
+328/328 - 55s - loss: 0.2282 - accuracy: 0.9238 - val_loss: 0.1515 - val_accuracy: 0.9588
 Epoch 2/3
-328/328 - 52s - loss: 0.1028 - accuracy: 0.9606 - val_loss: 0.1062 - val_accuracy: 0.9637
+328/328 - 52s - loss: 0.1016 - accuracy: 0.9620 - val_loss: 0.1071 - val_accuracy: 0.9653
 Epoch 3/3
-328/328 - 52s - loss: 0.0638 - accuracy: 0.9754 - val_loss: 0.1429 - val_accuracy: 0.9547
+328/328 - 52s - loss: 0.0633 - accuracy: 0.9756 - val_loss: 0.1160 - val_accuracy: 0.9630
 </pre></div>
 </div>
 </div>
@@ -959,7 +959,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  26.229 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  9.231 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index c571bf530..cd07b6f71 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -300,14 +300,14 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:14.025</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>04:57.098</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <ul class="simple">
-<li><p><strong>04:26.229</strong>: <a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></li>
-<li><p><strong>00:43.453</strong>: <a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></li>
-<li><p><strong>00:03.691</strong>: <a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></li>
-<li><p><strong>00:00.225</strong>: <a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></li>
+<li><p><strong>04:09.231</strong>: <a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></li>
+<li><p><strong>00:43.432</strong>: <a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></li>
+<li><p><strong>00:03.791</strong>: <a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></li>
 <li><p><strong>00:00.215</strong>: <a class="reference internal" href="micro_reference_vm.html#sphx-glr-how-to-work-with-microtvm-micro-reference-vm-py"><span class="std std-ref">microTVM Reference Virtual Machines</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_reference_vm.py</span></code>)</p></li>
-<li><p><strong>00:00.213</strong>: <a class="reference internal" href="micro_tvmc.html#sphx-glr-how-to-work-with-microtvm-micro-tvmc-py"><span class="std std-ref">Executing a Tiny Model with TVMC Micro</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tvmc.py</span></code>)</p></li>
+<li><p><strong>00:00.215</strong>: <a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></li>
+<li><p><strong>00:00.214</strong>: <a class="reference internal" href="micro_tvmc.html#sphx-glr-how-to-work-with-microtvm-micro-tvmc-py"><span class="std std-ref">Executing a Tiny Model with TVMC Micro</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tvmc.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index 484554f1b..4d04ad816 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -300,11 +300,11 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:12.024</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:12.240</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:09.997</strong>: <a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></li>
-<li><p><strong>00:01.779</strong>: <a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></li>
-<li><p><strong>00:00.248</strong>: <a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></li>
+<li><p><strong>00:10.232</strong>: <a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></li>
+<li><p><strong>00:01.776</strong>: <a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></li>
+<li><p><strong>00:00.231</strong>: <a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index 04288b777..e46b124e8 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -300,16 +300,16 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:05.793</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:05.947</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:02.085</strong>: <a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></li>
-<li><p><strong>00:01.152</strong>: <a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></li>
-<li><p><strong>00:00.737</strong>: <a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></li>
-<li><p><strong>00:00.733</strong>: <a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></li>
-<li><p><strong>00:00.324</strong>: <a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></li>
-<li><p><strong>00:00.266</strong>: <a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></li>
-<li><p><strong>00:00.259</strong>: <a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></li>
-<li><p><strong>00:00.238</strong>: <a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></li>
+<li><p><strong>00:02.154</strong>: <a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></li>
+<li><p><strong>00:01.200</strong>: <a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></li>
+<li><p><strong>00:00.761</strong>: <a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></li>
+<li><p><strong>00:00.743</strong>: <a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></li>
+<li><p><strong>00:00.332</strong>: <a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></li>
+<li><p><strong>00:00.262</strong>: <a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></li>
+<li><p><strong>00:00.255</strong>: <a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></li>
+<li><p><strong>00:00.241</strong>: <a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index 6d9f7babf..501f589b4 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -552,7 +552,7 @@ The importing needs to happen before the tensorized GEMV being executed.</p>
              C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C}
   preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpellhfz3e/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpellhfz3e/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
+  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmp5wlj63qn/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmp5wlj63qn/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
   for (i, 0, 1024) {
     for (j.outer: int32, 0, 32) {
       @tir.call_extern(&quot;gemv_update&quot;, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/reference/api/doxygen/apply__history__best_8h.html b/docs/reference/api/doxygen/apply__history__best_8h.html
index 89b4ab76c..1d27432cb 100644
--- a/docs/reference/api/doxygen/apply__history__best_8h.html
+++ b/docs/reference/api/doxygen/apply__history__best_8h.html
@@ -69,12 +69,19 @@ $(function() {
 <div class="title">apply_history_best.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
-<div class="textblock"><code>#include &lt;<a class="el" href="database_8h_source.html">tvm/meta_schedule/database.h</a>&gt;</code><br />
+<div class="textblock"><code>#include &lt;<a class="el" href="ir_2module_8h_source.html">tvm/ir/module.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="database_8h_source.html">tvm/meta_schedule/database.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="reflection_8h_source.html">tvm/node/reflection.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="array_8h_source.html">tvm/runtime/container/array.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="optional_8h_source.html">tvm/runtime/container/optional.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="string_8h_source.html">tvm/runtime/container/string.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="object_8h_source.html">tvm/runtime/object.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="packed__func_8h_source.html">tvm/runtime/packed_func.h</a>&gt;</code><br />
 <code>#include &lt;<a class="el" href="target_8h_source.html">tvm/target/target.h</a>&gt;</code><br />
 </div><div class="textblock"><div class="dynheader">
 Include dependency graph for apply_history_best.h:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="apply__history__best_8h__incl.svg" width="4482" height="1470"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="apply__history__best_8h__incl.svg" width="5136" height="1395"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/apply__history__best_8h__incl.svg b/docs/reference/api/doxygen/apply__history__best_8h__incl.svg
index 3463c59f1..432331458 100644
--- a/docs/reference/api/doxygen/apply__history__best_8h__incl.svg
+++ b/docs/reference/api/doxygen/apply__history__best_8h__incl.svg
@@ -4,1571 +4,1713 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/meta_schedule/apply_history_best.h Pages: 1 -->
-<svg width="3361pt" height="1102pt"
- viewBox="0.00 0.00 3361.00 1102.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1098)">
+<svg width="3852pt" height="1046pt"
+ viewBox="0.00 0.00 3852.04 1046.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1042)">
 <title>include/tvm/meta_schedule/apply_history_best.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1098 3357,-1098 3357,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1042 3848.0432,-1042 3848.0432,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="1477,-1063.5 1477,-1093.5 1629,-1093.5 1629,-1063.5 1477,-1063.5"/>
-<text text-anchor="start" x="1485" y="-1081.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1553" y="-1070.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/apply_history_best.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="1606.0432,-1007.5 1606.0432,-1037.5 1758.0432,-1037.5 1758.0432,-1007.5 1606.0432,-1007.5"/>
+<text text-anchor="start" x="1614.0432" y="-1025.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1682.0432" y="-1014.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/apply_history_best.h</text>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
 <title>Node1</title>
-<g id="a_node2"><a xlink:href="database_8h.html" target="_top" xlink:title="tvm/meta_schedule/database.h">
-<polygon fill="#ffffff" stroke="#000000" points="1412,-1007.5 1412,-1026.5 1582,-1026.5 1582,-1007.5 1412,-1007.5"/>
-<text text-anchor="middle" x="1497" y="-1014.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/database.h</text>
+<g id="a_node2"><a xlink:href="ir_2module_8h.html" target="_top" xlink:title="IRModule that holds the functions and type definitions. ">
+<polygon fill="#ffffff" stroke="#000000" points="1114.5432,-839.5 1114.5432,-858.5 1209.5432,-858.5 1209.5432,-839.5 1114.5432,-839.5"/>
+<text text-anchor="middle" x="1162.0432" y="-846.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node1 -->
 <g id="edge1" class="edge">
 <title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M1539.1573,-1063.2977C1531.141,-1054.4941 1521.0238,-1043.3833 1512.71,-1034.2529"/>
-<polygon fill="#191970" stroke="#191970" points="1515.2869,-1031.8845 1505.9663,-1026.8469 1510.1112,-1036.5974 1515.2869,-1031.8845"/>
+<path fill="none" stroke="#191970" d="M1636.8063,-1007.4065C1535.9596,-973.7586 1294.2184,-893.1007 1200.2834,-861.759"/>
+<polygon fill="#191970" stroke="#191970" points="1201.2115,-858.379 1190.6178,-858.534 1198.996,-865.0192 1201.2115,-858.379"/>
 </g>
-<!-- Node58 -->
-<g id="node39" class="node">
-<title>Node58</title>
-<g id="a_node39"><a xlink:href="target_8h.html" target="_top" xlink:title="Compilation target object. ">
-<polygon fill="#ffffff" stroke="#000000" points="1589,-946 1589,-965 1699,-965 1699,-946 1589,-946"/>
-<text text-anchor="middle" x="1644" y="-953" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target.h</text>
+<!-- Node6 -->
+<g id="node7" class="node">
+<title>Node6</title>
+<g id="a_node7"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="3112.5432,-436.5 3112.5432,-455.5 3233.5432,-455.5 3233.5432,-436.5 3112.5432,-436.5"/>
+<text text-anchor="middle" x="3173.0432" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
 </a>
 </g>
 </g>
-<!-- Node0&#45;&gt;Node58 -->
-<g id="edge191" class="edge">
-<title>Node0&#45;&gt;Node58</title>
-<path fill="none" stroke="#191970" d="M1564.1584,-1063.4178C1580.9919,-1040.6648 1612.8187,-997.6461 1630.7872,-973.3591"/>
-<polygon fill="#191970" stroke="#191970" points="1633.8012,-975.17 1636.9351,-965.0493 1628.1738,-971.0066 1633.8012,-975.17"/>
+<!-- Node0&#45;&gt;Node6 -->
+<g id="edge210" class="edge">
+<title>Node0&#45;&gt;Node6</title>
+<path fill="none" stroke="#191970" d="M1758.0962,-1021.8703C2113.3332,-1018.4397 3592.0432,-998.5418 3592.0432,-905 3592.0432,-905 3592.0432,-905 3592.0432,-563.5 3592.0432,-492.1091 3363.6835,-462.0879 3244.1366,-451.2629"/>
+<polygon fill="#191970" stroke="#191970" points="3244.2387,-447.7584 3233.9694,-450.3648 3243.6226,-454.7313 3244.2387,-447.7584"/>
 </g>
-<!-- Node2 -->
-<g id="node3" class="node">
-<title>Node2</title>
-<g id="a_node3"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="tvm/meta_schedule/arg\l_info.h">
-<polygon fill="#ffffff" stroke="#000000" points="1418,-940.5 1418,-970.5 1550,-970.5 1550,-940.5 1418,-940.5"/>
-<text text-anchor="start" x="1426" y="-958.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/arg</text>
-<text text-anchor="middle" x="1484" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_info.h</text>
+<!-- Node21 -->
+<g id="node9" class="node">
+<title>Node21</title>
+<g id="a_node9"><a xlink:href="array_8h.html" target="_top" xlink:title="Runtime Array container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="1266.0432,-302.5 1266.0432,-332.5 1392.0432,-332.5 1392.0432,-302.5 1266.0432,-302.5"/>
+<text text-anchor="start" x="1274.0432" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1329.0432" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
 </a>
 </g>
 </g>
-<!-- Node1&#45;&gt;Node2 -->
-<g id="edge2" class="edge">
-<title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M1494.9688,-1007.3906C1493.4374,-1000.1463 1491.2648,-989.8679 1489.2566,-980.3676"/>
-<polygon fill="#191970" stroke="#191970" points="1492.6727,-979.6045 1487.1802,-970.5446 1485.824,-981.0523 1492.6727,-979.6045"/>
+<!-- Node0&#45;&gt;Node21 -->
+<g id="edge211" class="edge">
+<title>Node0&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M1605.8143,-1019.0736C1341.1505,-1006.6804 478.0432,-961.9025 478.0432,-905 478.0432,-905 478.0432,-905 478.0432,-849 478.0432,-602.9937 690.5567,-619.3363 901.0432,-492 1023.347,-418.011 1182.6774,-362.4994 1268.2868,-335.5754"/>
+<polygon fill="#191970" stroke="#191970" points="1269.3471,-338.9111 1277.8501,-332.5905 1267.2614,-332.229 1269.3471,-338.9111"/>
 </g>
-<!-- Node1&#45;&gt;Node58 -->
-<g id="edge136" class="edge">
-<title>Node1&#45;&gt;Node58</title>
-<path fill="none" stroke="#191970" d="M1519.9688,-1007.3906C1544.6724,-997.0554 1584.137,-980.5447 1611.9027,-968.9284"/>
-<polygon fill="#191970" stroke="#191970" points="1613.3182,-972.1303 1621.1925,-965.0419 1610.6165,-965.6727 1613.3182,-972.1303"/>
+<!-- Node10 -->
+<g id="node18" class="node">
+<title>Node10</title>
+<g id="a_node18"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
+<polygon fill="#ffffff" stroke="#000000" points="3050.5432,-67.5 3050.5432,-86.5 3169.5432,-86.5 3169.5432,-67.5 3050.5432,-67.5"/>
+<text text-anchor="middle" x="3110.0432" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
+</a>
 </g>
-<!-- Node74 -->
-<g id="node49" class="node">
-<title>Node74</title>
-<g id="a_node49"><a xlink:href="trace_8h.html" target="_top" xlink:title="tvm/tir/schedule/trace.h">
-<polygon fill="#ffffff" stroke="#000000" points="66.5,-660.5 66.5,-679.5 199.5,-679.5 199.5,-660.5 66.5,-660.5"/>
-<text text-anchor="middle" x="133" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/schedule/trace.h</text>
+</g>
+<!-- Node0&#45;&gt;Node10 -->
+<g id="edge214" class="edge">
+<title>Node0&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1758.1846,-1022.2214C2130.7793,-1020.3696 3744.0432,-1006.4912 3744.0432,-905 3744.0432,-905 3744.0432,-905 3744.0432,-563.5 3744.0432,-483.7015 3730.0432,-464.2985 3730.0432,-384.5 3730.0432,-384.5 3730.0432,-384.5 3730.0432,-250.5 3730.0432,-213.6453 3717.2551,-201.4709 3688.0432,-179 3614.539,-122.4577 3577.9931,-141.1115 3487.0432,-123 3429.933,-111.6272 3272.1632,-94.0501 3179.8218,-84.2439"/>
+<polygon fill="#191970" stroke="#191970" points="3180.0344,-80.747 3169.7215,-83.1746 3179.2974,-87.7081 3180.0344,-80.747"/>
+</g>
+<!-- Node31 -->
+<g id="node27" class="node">
+<title>Node31</title>
+<g id="a_node27"><a xlink:href="optional_8h.html" target="_top" xlink:title="Runtime Optional container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="598.0432,-235.5 598.0432,-265.5 724.0432,-265.5 724.0432,-235.5 598.0432,-235.5"/>
+<text text-anchor="start" x="606.0432" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="661.0432" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/optional.h</text>
 </a>
 </g>
 </g>
-<!-- Node1&#45;&gt;Node74 -->
-<g id="edge187" class="edge">
-<title>Node1&#45;&gt;Node74</title>
-<path fill="none" stroke="#191970" d="M1411.9543,-1014.4997C1094.9195,-1004.6274 0,-965.3659 0,-894 0,-894 0,-894 0,-782 0,-732.0478 55.5705,-699.8768 94.8708,-683.371"/>
-<polygon fill="#191970" stroke="#191970" points="96.4966,-686.4897 104.4682,-679.5108 93.8844,-679.9953 96.4966,-686.4897"/>
+<!-- Node0&#45;&gt;Node31 -->
+<g id="edge212" class="edge">
+<title>Node0&#45;&gt;Node31</title>
+<path fill="none" stroke="#191970" d="M1605.6659,-1021.5453C1328.0182,-1017.8224 385.5885,-1002.8821 330.0432,-971 299.8885,-953.6917 288.0432,-939.7691 288.0432,-905 288.0432,-905 288.0432,-905 288.0432,-446 288.0432,-312.1354 478.8422,-269.7502 587.5174,-256.4656"/>
+<polygon fill="#191970" stroke="#191970" points="588.2422,-259.9048 597.7689,-255.2687 587.4304,-252.952 588.2422,-259.9048"/>
 </g>
-<!-- Node3 -->
-<g id="node4" class="node">
-<title>Node3</title>
-<g id="a_node4"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2128.5,-492.5 2128.5,-511.5 2227.5,-511.5 2227.5,-492.5 2128.5,-492.5"/>
-<text text-anchor="middle" x="2178" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
+<!-- Node33 -->
+<g id="node28" class="node">
+<title>Node33</title>
+<g id="a_node28"><a xlink:href="string_8h.html" target="_top" xlink:title="Runtime String container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="1266.0432,-235.5 1266.0432,-265.5 1392.0432,-265.5 1392.0432,-235.5 1266.0432,-235.5"/>
+<text text-anchor="start" x="1274.0432" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1329.0432" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
 </a>
 </g>
 </g>
-<!-- Node2&#45;&gt;Node3 -->
-<g id="edge3" class="edge">
-<title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1550.2515,-943.994C1560.2012,-942.501 1570.3561,-941.1085 1580,-940 1727.7126,-923.0214 2813.5422,-956.6903 2915,-848 2955.7627,-804.3315 3014.8481,-837.2076 2844,-716 2696.6322,-611.4504 2615.0595,-692.8407 2448,-624 2418.2932,-611.7586 2354.227,-561.3465 2325,-548 2293.6664,-533.6915 2256.6473,-522.1359 2227.5974,-514.1855"/>
-<polygon fill="#191970" stroke="#191970" points="2228.2421,-510.7348 2217.6768,-511.5251 2226.429,-517.4959 2228.2421,-510.7348"/>
+<!-- Node0&#45;&gt;Node33 -->
+<g id="edge213" class="edge">
+<title>Node0&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M1605.7043,-1019.9318C1403.2524,-1012.9065 855.8791,-992.4593 676.0432,-971 518.8612,-952.2439 326.0432,-1063.2971 326.0432,-905 326.0432,-905 326.0432,-905 326.0432,-793 326.0432,-630.482 716.6974,-361.2161 868.0432,-302 876.9767,-298.5047 1128.1226,-271.6694 1255.8816,-258.184"/>
+<polygon fill="#191970" stroke="#191970" points="1256.4048,-261.6483 1265.9824,-257.1185 1255.6704,-254.687 1256.4048,-261.6483"/>
 </g>
-<!-- Node30 -->
-<g id="node23" class="node">
-<title>Node30</title>
-<g id="a_node23"><a xlink:href="shape__tuple_8h.html" target="_top" xlink:title="Runtime ShapeTuple container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1178,-235.5 1178,-265.5 1304,-265.5 1304,-235.5 1178,-235.5"/>
-<text text-anchor="start" x="1186" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1241" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/shape_tuple.h</text>
+<!-- Node41 -->
+<g id="node33" class="node">
+<title>Node41</title>
+<g id="a_node33"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
+<polygon fill="#ffffff" stroke="#000000" points="2395.0432,-369.5 2395.0432,-399.5 2511.0432,-399.5 2511.0432,-369.5 2395.0432,-369.5"/>
+<text text-anchor="start" x="2403.0432" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/packed</text>
+<text text-anchor="middle" x="2453.0432" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_func.h</text>
 </a>
 </g>
 </g>
-<!-- Node2&#45;&gt;Node30 -->
-<g id="edge78" class="edge">
-<title>Node2&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M1417.831,-954.9444C1267.0048,-952.9851 886.3733,-944.1183 571,-904 433.4157,-886.498 266,-976.693 266,-838 266,-838 266,-838 266,-782 266,-618.8882 567.324,-440.5879 925,-302 1004.4552,-271.2137 1102.0654,-258.8312 1167.5956,-253.8508"/>
-<polygon fill="#191970" stroke="#191970" points="1168.107,-257.3231 1177.8296,-253.1134 1167.6039,-250.3412 1168.107,-257.3231"/>
+<!-- Node0&#45;&gt;Node41 -->
+<g id="edge215" class="edge">
+<title>Node0&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M1714.026,-1007.49C1775.3033,-979.5434 1914.1844,-920.0973 2038.0432,-895 2243.0995,-853.4499 2837.3919,-960.5645 2975.0432,-803 2997.0238,-777.8397 3012.9782,-668.4919 2978.0432,-615 2875.1731,-457.4868 2638.0595,-406.9164 2520.978,-391.2344"/>
+<polygon fill="#191970" stroke="#191970" points="2521.4194,-387.7624 2511.053,-389.9504 2520.5212,-394.7046 2521.4194,-387.7624"/>
 </g>
-<!-- Node47 -->
-<g id="node29" class="node">
-<title>Node47</title>
-<g id="a_node29"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
-<polygon fill="#ffffff" stroke="#000000" points="1397.5,-884.5 1397.5,-903.5 1498.5,-903.5 1498.5,-884.5 1397.5,-884.5"/>
-<text text-anchor="middle" x="1448" y="-891.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/function.h</text>
+<!-- Node58 -->
+<g id="node45" class="node">
+<title>Node58</title>
+<g id="a_node45"><a xlink:href="database_8h.html" target="_top" xlink:title="tvm/meta_schedule/database.h">
+<polygon fill="#ffffff" stroke="#000000" points="2046.0432,-951.5 2046.0432,-970.5 2216.0432,-970.5 2216.0432,-951.5 2046.0432,-951.5"/>
+<text text-anchor="middle" x="2131.0432" y="-958.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/database.h</text>
 </a>
 </g>
 </g>
-<!-- Node2&#45;&gt;Node47 -->
-<g id="edge79" class="edge">
-<title>Node2&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M1475.1011,-940.2977C1470.2081,-931.9388 1464.0974,-921.4997 1458.9176,-912.6509"/>
-<polygon fill="#191970" stroke="#191970" points="1461.8365,-910.7089 1453.7641,-903.8469 1455.7953,-914.2452 1461.8365,-910.7089"/>
+<!-- Node0&#45;&gt;Node58 -->
+<g id="edge181" class="edge">
+<title>Node0&#45;&gt;Node58</title>
+<path fill="none" stroke="#191970" d="M1758.21,-1012.0674C1839.7514,-1000.8986 1968.5488,-983.257 2051.403,-971.9084"/>
+<polygon fill="#191970" stroke="#191970" points="2052.0912,-975.3469 2061.5237,-970.5222 2051.1412,-968.4117 2052.0912,-975.3469"/>
 </g>
-<!-- Node4 -->
-<g id="node5" class="node">
-<title>Node4</title>
-<g id="a_node5"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1990.5,-436.5 1990.5,-455.5 2111.5,-455.5 2111.5,-436.5 1990.5,-436.5"/>
-<text text-anchor="middle" x="2051" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
+<!-- Node65 -->
+<g id="node47" class="node">
+<title>Node65</title>
+<g id="a_node47"><a xlink:href="target_8h.html" target="_top" xlink:title="Compilation target object. ">
+<polygon fill="#ffffff" stroke="#000000" points="1627.0432,-895.5 1627.0432,-914.5 1737.0432,-914.5 1737.0432,-895.5 1627.0432,-895.5"/>
+<text text-anchor="middle" x="1682.0432" y="-902.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target.h</text>
 </a>
 </g>
 </g>
-<!-- Node3&#45;&gt;Node4 -->
-<g id="edge4" class="edge">
-<title>Node3&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M2156.1679,-492.3733C2135.6897,-483.3435 2104.8601,-469.7494 2081.8681,-459.6111"/>
-<polygon fill="#191970" stroke="#191970" points="2083.1093,-456.3333 2072.5472,-455.5011 2080.285,-462.7383 2083.1093,-456.3333"/>
+<!-- Node0&#45;&gt;Node65 -->
+<g id="edge216" class="edge">
+<title>Node0&#45;&gt;Node65</title>
+<path fill="none" stroke="#191970" d="M1682.0432,-1007.1389C1682.0432,-985.9692 1682.0432,-947.8174 1682.0432,-924.6112"/>
+<polygon fill="#191970" stroke="#191970" points="1685.5433,-924.5594 1682.0432,-914.5595 1678.5433,-924.5595 1685.5433,-924.5594"/>
 </g>
-<!-- Node5 -->
-<g id="node6" class="node">
-<title>Node5</title>
-<g id="a_node6"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2337.5,-369.5 2337.5,-399.5 2450.5,-399.5 2450.5,-369.5 2337.5,-369.5"/>
-<text text-anchor="start" x="2345.5" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="2394" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+<!-- Node2 -->
+<g id="node3" class="node">
+<title>Node2</title>
+<g id="a_node3"><a xlink:href="ir_2adt_8h.html" target="_top" xlink:title="Algebraic data type definitions. ">
+<polygon fill="#ffffff" stroke="#000000" points="1266.0432,-727.5 1266.0432,-746.5 1340.0432,-746.5 1340.0432,-727.5 1266.0432,-727.5"/>
+<text text-anchor="middle" x="1303.0432" y="-734.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/adt.h</text>
 </a>
 </g>
 </g>
-<!-- Node3&#45;&gt;Node5 -->
-<g id="edge69" class="edge">
-<title>Node3&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M2227.6688,-497.4816C2282.0293,-491.4541 2364.2407,-478.796 2385,-456 2396.1168,-443.7925 2398.1638,-425.1704 2397.5378,-409.9912"/>
-<polygon fill="#191970" stroke="#191970" points="2400.9991,-409.3717 2396.7011,-399.6878 2394.0221,-409.9384 2400.9991,-409.3717"/>
+<!-- Node1&#45;&gt;Node2 -->
+<g id="edge2" class="edge">
+<title>Node1&#45;&gt;Node2</title>
+<path fill="none" stroke="#191970" d="M1174.0648,-839.4509C1198.3104,-820.1921 1253.2881,-776.5218 1283.0345,-752.8935"/>
+<polygon fill="#191970" stroke="#191970" points="1285.3987,-755.4854 1291.0521,-746.5249 1281.0448,-750.0041 1285.3987,-755.4854"/>
 </g>
-<!-- Node16 -->
-<g id="node10" class="node">
-<title>Node16</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="488.5,-6 488.5,-25 533.5,-25 533.5,-6 488.5,-6"/>
-<text text-anchor="middle" x="511" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
+<!-- Node3 -->
+<g id="node4" class="node">
+<title>Node3</title>
+<g id="a_node4"><a xlink:href="ir_2expr_8h.html" target="_top" xlink:title="Base expr nodes in TVM. ">
+<polygon fill="#ffffff" stroke="#000000" points="2134.5432,-671.5 2134.5432,-690.5 2213.5432,-690.5 2213.5432,-671.5 2134.5432,-671.5"/>
+<text text-anchor="middle" x="2174.0432" y="-678.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/expr.h</text>
+</a>
 </g>
-<!-- Node3&#45;&gt;Node16 -->
-<g id="edge76" class="edge">
-<title>Node3&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2128.281,-500.2552C1970.6528,-494.13 1468.3177,-470.0497 1059,-400 819.9388,-359.0876 748.1659,-371.961 530,-266 479.5431,-241.4936 430,-245.0934 430,-189 430,-189 430,-189 430,-133 430,-101.7875 436.5721,-92.8939 454,-67 463.3479,-53.1111 477.2339,-40.46 488.9596,-31.1725"/>
-<polygon fill="#191970" stroke="#191970" points="491.1846,-33.8774 497.0202,-25.0345 486.9438,-28.3082 491.1846,-33.8774"/>
+</g>
+<!-- Node1&#45;&gt;Node3 -->
+<g id="edge141" class="edge">
+<title>Node1&#45;&gt;Node3</title>
+<path fill="none" stroke="#191970" d="M1208.1164,-839.4889C1277.0206,-825.528 1411.1174,-799.3739 1526.0432,-783 1679.1055,-761.1926 1722.5092,-786.2834 1872.0432,-747 1893.6049,-741.3357 1896.7017,-733.4441 1918.0432,-727 2005.3427,-700.6398 2030.3199,-707.3027 2120.0432,-691 2121.509,-690.7337 2122.997,-690.4626 2124.499,-690.1884"/>
+<polygon fill="#191970" stroke="#191970" points="2125.3251,-693.5953 2134.5298,-688.3491 2124.0625,-686.7101 2125.3251,-693.5953"/>
+</g>
+<!-- Node1&#45;&gt;Node21 -->
+<g id="edge173" class="edge">
+<title>Node1&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M1114.3561,-845.4105C1058.541,-840.1025 971.1517,-827.9646 951.0432,-803 945.4673,-796.0775 948.3017,-791.4555 951.0432,-783 1021.1265,-566.847 1086.909,-526.1423 1251.0432,-369 1263.2544,-357.3091 1278.4444,-346.6045 1292.1665,-338.0197"/>
+<polygon fill="#191970" stroke="#191970" points="1294.1986,-340.8803 1300.9258,-332.6952 1290.5626,-334.8987 1294.1986,-340.8803"/>
 </g>
 <!-- Node18 -->
-<g id="node11" class="node">
+<g id="node12" class="node">
 <title>Node18</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2115.5,-179.5 2115.5,-198.5 2162.5,-198.5 2162.5,-179.5 2115.5,-179.5"/>
-<text text-anchor="middle" x="2139" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="545.5432,-6 545.5432,-25 590.5432,-25 590.5432,-6 545.5432,-6"/>
+<text text-anchor="middle" x="568.0432" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
 </g>
-<!-- Node3&#45;&gt;Node18 -->
-<g id="edge77" class="edge">
-<title>Node3&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2174.4118,-492.2755C2171.1641,-483.0638 2166.5157,-468.7773 2164,-456 2146.0463,-364.8123 2140.8911,-253.4867 2139.4912,-208.8763"/>
-<polygon fill="#191970" stroke="#191970" points="2142.9876,-208.6969 2139.205,-198.8003 2135.9904,-208.8957 2142.9876,-208.6969"/>
+<!-- Node1&#45;&gt;Node18 -->
+<g id="edge179" class="edge">
+<title>Node1&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1114.52,-848.2366C993.0858,-845.8165 673.955,-836.3761 574.0432,-803 230.4663,-688.2261 38.0432,-551.2405 38.0432,-189 38.0432,-189 38.0432,-189 38.0432,-133 38.0432,-90.8832 67.9513,-84.9676 106.0432,-67 182.4871,-30.9422 443.119,-19.4128 535.3629,-16.4123"/>
+<polygon fill="#191970" stroke="#191970" points="535.6203,-19.9061 545.5059,-16.095 535.4014,-12.9095 535.6203,-19.9061"/>
 </g>
-<!-- Node23 -->
-<g id="node15" class="node">
-<title>Node23</title>
-<g id="a_node15"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="992.5,-123.5 992.5,-142.5 1121.5,-142.5 1121.5,-123.5 992.5,-123.5"/>
-<text text-anchor="middle" x="1057" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
-</a>
+<!-- Node20 -->
+<g id="node13" class="node">
+<title>Node20</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1751.5432,-179.5 1751.5432,-198.5 1798.5432,-198.5 1798.5432,-179.5 1751.5432,-179.5"/>
+<text text-anchor="middle" x="1775.0432" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
 </g>
+<!-- Node1&#45;&gt;Node20 -->
+<g id="edge180" class="edge">
+<title>Node1&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1164.6525,-839.3993C1168.3259,-826.3701 1175.533,-802.4797 1184.0432,-783 1288.5423,-543.805 1336.0371,-486.1386 1521.0432,-302 1553.9144,-269.283 1559.8597,-256.3268 1601.0432,-235 1625.3819,-222.3963 1698.0777,-205.4642 1741.4227,-196.0552"/>
+<polygon fill="#191970" stroke="#191970" points="1742.201,-199.4679 1751.2415,-193.9434 1740.7291,-192.6244 1742.201,-199.4679"/>
 </g>
-<!-- Node3&#45;&gt;Node23 -->
-<g id="edge72" class="edge">
-<title>Node3&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2128.5806,-492.4537C2081.9591,-483.368 2010.1458,-469.1667 1948,-456 1569.7298,-375.8568 1399.9523,-508.7736 1099,-266 1064.7258,-238.3515 1057.9522,-182.8667 1056.9035,-153.0476"/>
-<polygon fill="#191970" stroke="#191970" points="1060.3969,-152.5975 1056.7376,-142.6547 1053.3978,-152.7093 1060.3969,-152.5975"/>
+<!-- Node16 -->
+<g id="node20" class="node">
+<title>Node16</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1892.0432,-6 1892.0432,-25 1936.0432,-25 1936.0432,-6 1892.0432,-6"/>
+<text text-anchor="middle" x="1914.0432" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
 </g>
-<!-- Node8 -->
-<g id="node16" class="node">
-<title>Node8</title>
-<g id="a_node16"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1874.5,-67.5 1874.5,-86.5 1993.5,-86.5 1993.5,-67.5 1874.5,-67.5"/>
-<text text-anchor="middle" x="1934" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
-</a>
+<!-- Node1&#45;&gt;Node16 -->
+<g id="edge176" class="edge">
+<title>Node1&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1114.2081,-848.3139C1012.4115,-846.1901 777.8513,-837.5789 707.0432,-803 574.7078,-738.3744 478.0432,-710.7723 478.0432,-563.5 478.0432,-563.5 478.0432,-563.5 478.0432,-446 478.0432,-411.2167 473.4057,-402.4996 464.0432,-369 440.0203,-283.0446 341.2299,-246.993 399.0432,-179 431.2755,-141.0924 571.0905,-151.9181 620.0432,-143 788.2018,-112.3653 827.5047,-88.7365 997.0432,-67 1173.7285,-44.3473 1740.507,-22.0068 1881.8691,-16.6888"/>
+<polygon fill="#191970" stroke="#191970" points="1882.1748,-20.1799 1892.0368,-16.3081 1881.9128,-13.1848 1882.1748,-20.1799"/>
 </g>
+<!-- Node1&#45;&gt;Node33 -->
+<g id="edge175" class="edge">
+<title>Node1&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M1114.5003,-846.3089C1004.9428,-838.8095 744.0432,-812.9745 744.0432,-737 744.0432,-737 744.0432,-737 744.0432,-563.5 744.0432,-374.2699 899.2792,-357.968 1080.0432,-302 1138.1351,-284.0136 1205.8102,-270.4719 1255.9327,-261.7996"/>
+<polygon fill="#191970" stroke="#191970" points="1256.7641,-265.2084 1266.0332,-260.0767 1255.5871,-258.3081 1256.7641,-265.2084"/>
 </g>
-<!-- Node3&#45;&gt;Node8 -->
-<g id="edge73" class="edge">
-<title>Node3&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M2128.0978,-497.2618C2076.1551,-491.1762 1999.9097,-478.5996 1982,-456 1935.675,-397.544 2062.9312,-186.6197 2024,-123 2014.6591,-107.7354 1998.4772,-97.3326 1982.3874,-90.3404"/>
-<polygon fill="#191970" stroke="#191970" points="1983.5629,-87.0414 1972.977,-86.6067 1980.9813,-93.548 1983.5629,-87.0414"/>
+<!-- Node36 -->
+<g id="node31" class="node">
+<title>Node36</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="408.5432,-179.5 408.5432,-198.5 501.5432,-198.5 501.5432,-179.5 408.5432,-179.5"/>
+<text text-anchor="middle" x="455.0432" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_map</text>
 </g>
-<!-- Node9 -->
-<g id="node17" class="node">
-<title>Node9</title>
-<g id="a_node17"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="1733.5,-.5 1733.5,-30.5 1862.5,-30.5 1862.5,-.5 1733.5,-.5"/>
-<text text-anchor="start" x="1741.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
-<text text-anchor="middle" x="1798" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
-</a>
+<!-- Node1&#45;&gt;Node36 -->
+<g id="edge177" class="edge">
+<title>Node1&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M1114.4297,-848.2947C998.9393,-846.0576 706.7578,-837.0974 616.0432,-803 576.769,-788.2377 569.1298,-777.2385 540.0432,-747 407.4057,-609.1098 435.2522,-523.8715 422.0432,-333 419.0348,-289.528 411.6951,-276.4812 425.0432,-235 428.3033,-224.8688 434.4242,-214.8269 440.2912,-206.7227"/>
+<polygon fill="#191970" stroke="#191970" points="443.082,-208.835 446.4105,-198.7766 437.536,-204.564 443.082,-208.835"/>
 </g>
+<!-- Node42 -->
+<g id="node34" class="node">
+<title>Node42</title>
+<g id="a_node34"><a xlink:href="map_8h.html" target="_top" xlink:title="Runtime Map container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="560.0432,-302.5 560.0432,-332.5 686.0432,-332.5 686.0432,-302.5 560.0432,-302.5"/>
+<text text-anchor="start" x="568.0432" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="623.0432" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/map.h</text>
+</a>
 </g>
-<!-- Node3&#45;&gt;Node9 -->
-<g id="edge71" class="edge">
-<title>Node3&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M2179.0778,-492.2531C2181.6038,-470.1738 2188.4247,-414.7076 2198,-369 2209.1856,-315.6059 2230,-305.0532 2230,-250.5 2230,-250.5 2230,-250.5 2230,-133 2230,-114.696 1996.0322,-59.6958 1872.4642,-31.9244"/>
-<polygon fill="#191970" stroke="#191970" points="1873.1983,-28.5022 1862.6748,-29.7293 1871.6667,-35.3325 1873.1983,-28.5022"/>
 </g>
-<!-- Node14 -->
-<g id="node18" class="node">
-<title>Node14</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2970,-6 2970,-25 3014,-25 3014,-6 2970,-6"/>
-<text text-anchor="middle" x="2992" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
+<!-- Node1&#45;&gt;Node42 -->
+<g id="edge174" class="edge">
+<title>Node1&#45;&gt;Node42</title>
+<path fill="none" stroke="#191970" d="M1114.3629,-846.9582C1008.0518,-841.9786 758.6676,-827.7695 730.0432,-803 590.5809,-682.3193 609.3856,-424.8545 619.4147,-343.0249"/>
+<polygon fill="#191970" stroke="#191970" points="622.9145,-343.2508 620.7258,-332.8845 615.9723,-342.3531 622.9145,-343.2508"/>
 </g>
-<!-- Node3&#45;&gt;Node14 -->
-<g id="edge74" class="edge">
-<title>Node3&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2227.5097,-497.6405C2372.0014,-483.566 2783,-433.3498 2783,-317.5 2783,-317.5 2783,-317.5 2783,-133 2783,-101.7875 2784.0934,-88.2017 2807,-67 2829.2352,-46.4197 2913.1497,-29.103 2959.9539,-20.7896"/>
-<polygon fill="#191970" stroke="#191970" points="2960.6416,-24.2226 2969.8937,-19.0604 2959.4418,-17.3262 2960.6416,-24.2226"/>
+<!-- Node49 -->
+<g id="node38" class="node">
+<title>Node49</title>
+<g id="a_node38"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
+<polygon fill="#ffffff" stroke="#000000" points="2244.0432,-615.5 2244.0432,-634.5 2324.0432,-634.5 2324.0432,-615.5 2244.0432,-615.5"/>
+<text text-anchor="middle" x="2284.0432" y="-622.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/type.h</text>
+</a>
 </g>
-<!-- Node15 -->
-<g id="node19" class="node">
-<title>Node15</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1012.5,-6 1012.5,-25 1081.5,-25 1081.5,-6 1012.5,-6"/>
-<text text-anchor="middle" x="1047" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
 </g>
-<!-- Node3&#45;&gt;Node15 -->
-<g id="edge75" class="edge">
-<title>Node3&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M2128.434,-498.7266C1912.2321,-483.9612 1059.0202,-420.4585 981,-333 902.2913,-244.7697 926.4052,-169.6952 985,-67 993.5079,-52.0889 1008.101,-39.6415 1020.9927,-30.7212"/>
-<polygon fill="#191970" stroke="#191970" points="1023.0985,-33.5261 1029.5502,-25.1222 1019.2659,-27.6685 1023.0985,-33.5261"/>
+<!-- Node1&#45;&gt;Node49 -->
+<g id="edge160" class="edge">
+<title>Node1&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M1209.7173,-843.6772C1272.1428,-836.3485 1384.8357,-821.8842 1480.0432,-803 1515.57,-795.9534 1523.379,-789.314 1559.0432,-783 1852.4114,-731.0617 1961.6731,-833.997 2223.0432,-691 2243.6738,-679.7129 2261.156,-658.8393 2272.0459,-643.5795"/>
+<polygon fill="#191970" stroke="#191970" points="2275.1966,-645.172 2277.9438,-634.9395 2269.4151,-641.2254 2275.1966,-645.172"/>
 </g>
-<!-- Node27 -->
-<g id="node21" class="node">
-<title>Node27</title>
-<g id="a_node21"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="2206.5,-369.5 2206.5,-399.5 2319.5,-399.5 2319.5,-369.5 2206.5,-369.5"/>
-<text text-anchor="start" x="2214.5" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="2263" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
+<!-- Node51 -->
+<g id="node40" class="node">
+<title>Node51</title>
+<g id="a_node40"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="960.0432,-783.5 960.0432,-802.5 1058.0432,-802.5 1058.0432,-783.5 960.0432,-783.5"/>
+<text text-anchor="middle" x="1009.0432" y="-790.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/function.h</text>
 </a>
 </g>
 </g>
-<!-- Node3&#45;&gt;Node27 -->
-<g id="edge70" class="edge">
-<title>Node3&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M2184.9559,-492.3845C2198.2075,-474.0661 2227.4139,-433.6925 2246.0989,-407.8633"/>
-<polygon fill="#191970" stroke="#191970" points="2248.9685,-409.868 2251.9939,-399.7143 2243.2969,-405.7651 2248.9685,-409.868"/>
+<!-- Node1&#45;&gt;Node51 -->
+<g id="edge142" class="edge">
+<title>Node1&#45;&gt;Node51</title>
+<path fill="none" stroke="#191970" d="M1135.7416,-839.3733C1110.6304,-830.1822 1072.5994,-816.2624 1044.756,-806.0713"/>
+<polygon fill="#191970" stroke="#191970" points="1045.5954,-802.6516 1035.0017,-802.5011 1043.1894,-809.2251 1045.5954,-802.6516"/>
 </g>
-<!-- Node45 -->
-<g id="node28" class="node">
-<title>Node45</title>
-<g id="a_node28"><a xlink:href="repr__printer_8h.html" target="_top" xlink:title="Printer class to print repr string of each AST/IR nodes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2244.5,-436.5 2244.5,-455.5 2375.5,-455.5 2375.5,-436.5 2244.5,-436.5"/>
-<text text-anchor="middle" x="2310" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/repr_printer.h</text>
+<!-- Node54 -->
+<g id="node42" class="node">
+<title>Node54</title>
+<g id="a_node42"><a xlink:href="source__map_8h.html" target="_top" xlink:title="A map from source names to source code. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1572.5432,-615.5 1572.5432,-634.5 1713.5432,-634.5 1713.5432,-615.5 1572.5432,-615.5"/>
+<text text-anchor="middle" x="1643.0432" y="-622.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/parser/source_map.h</text>
 </a>
 </g>
 </g>
-<!-- Node3&#45;&gt;Node45 -->
-<g id="edge68" class="edge">
-<title>Node3&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M2200.6916,-492.3733C2222.0711,-483.3032 2254.3057,-469.6279 2278.2362,-459.4755"/>
-<polygon fill="#191970" stroke="#191970" points="2279.7656,-462.6287 2287.6045,-455.5011 2277.0317,-456.1846 2279.7656,-462.6287"/>
+<!-- Node1&#45;&gt;Node54 -->
+<g id="edge161" class="edge">
+<title>Node1&#45;&gt;Node54</title>
+<path fill="none" stroke="#191970" d="M1166.7261,-839.3399C1178.4799,-816.1502 1211.5732,-757.0358 1257.0432,-727 1350.4549,-665.2957 1479.6447,-640.8503 1562.2845,-631.211"/>
+<polygon fill="#191970" stroke="#191970" points="1562.8279,-634.6721 1572.3761,-630.0805 1562.0485,-627.7156 1562.8279,-634.6721"/>
 </g>
-<!-- Node4&#45;&gt;Node5 -->
-<g id="edge5" class="edge">
-<title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M2111.6897,-437.2114C2165.8426,-429.0852 2247.5144,-416.1336 2327.1296,-400.3068"/>
-<polygon fill="#191970" stroke="#191970" points="2328.2151,-403.6589 2337.3314,-398.2604 2326.8383,-396.7956 2328.2151,-403.6589"/>
+<!-- Node57 -->
+<g id="node44" class="node">
+<title>Node57</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1568.0432,-783.5 1568.0432,-802.5 1654.0432,-802.5 1654.0432,-783.5 1568.0432,-783.5"/>
+<text text-anchor="middle" x="1611.0432" y="-790.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_set</text>
 </g>
-<!-- Node4&#45;&gt;Node18 -->
-<g id="edge67" class="edge">
-<title>Node4&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2054.6165,-436.3637C2061.8942,-416.9007 2078.7446,-371.4762 2092,-333 2107.2752,-288.6609 2124.0756,-236.2194 2132.8641,-208.479"/>
-<polygon fill="#191970" stroke="#191970" points="2136.2423,-209.4043 2135.9182,-198.8144 2129.5676,-207.295 2136.2423,-209.4043"/>
+<!-- Node1&#45;&gt;Node57 -->
+<g id="edge178" class="edge">
+<title>Node1&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M1209.8286,-844.2776C1283.7717,-836.7707 1430.3063,-821.0992 1554.0432,-803 1555.3014,-802.816 1556.5745,-802.6257 1557.858,-802.4302"/>
+<polygon fill="#191970" stroke="#191970" points="1558.4973,-805.8726 1567.8249,-800.8481 1557.3998,-798.9592 1558.4973,-805.8726"/>
 </g>
-<!-- Node4&#45;&gt;Node23 -->
-<g id="edge62" class="edge">
-<title>Node4&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1990.3473,-438.799C1923.3986,-430.2809 1820.2952,-415.3138 1784,-400 1781.8653,-399.0993 1651.1773,-302.7923 1649,-302 1541.6085,-262.9222 1233.281,-325.97 1136,-266 1093.8526,-240.0177 1071.4393,-182.4813 1062.1955,-152.3133"/>
-<polygon fill="#191970" stroke="#191970" points="1065.5118,-151.1818 1059.3736,-142.5462 1058.7869,-153.1248 1065.5118,-151.1818"/>
+<!-- Node2&#45;&gt;Node3 -->
+<g id="edge3" class="edge">
+<title>Node2&#45;&gt;Node3</title>
+<path fill="none" stroke="#191970" d="M1340.1904,-734.6117C1478.82,-725.6986 1966.0593,-694.3721 2124.1256,-684.2094"/>
+<polygon fill="#191970" stroke="#191970" points="2124.7369,-687.6774 2134.4917,-683.5429 2124.2878,-680.6918 2124.7369,-687.6774"/>
 </g>
-<!-- Node4&#45;&gt;Node8 -->
-<g id="edge64" class="edge">
-<title>Node4&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M2052.8607,-436.1918C2061.8133,-387.8269 2098.5526,-174.9775 2062,-123 2052.746,-109.8409 2017.0827,-97.6585 1985.7439,-89.1408"/>
-<polygon fill="#191970" stroke="#191970" points="1986.3218,-85.6736 1975.7598,-86.5075 1984.5365,-92.4422 1986.3218,-85.6736"/>
+<!-- Node5 -->
+<g id="node6" class="node">
+<title>Node5</title>
+<g id="a_node6"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="2195.5432,-492.5 2195.5432,-511.5 2294.5432,-511.5 2294.5432,-492.5 2195.5432,-492.5"/>
+<text text-anchor="middle" x="2245.0432" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
+</a>
 </g>
-<!-- Node4&#45;&gt;Node9 -->
-<g id="edge60" class="edge">
-<title>Node4&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M2016.1809,-436.4932C1984.7954,-427.7126 1937.4949,-413.9541 1897,-400 1819.8278,-373.4073 1797.451,-372.4648 1726,-333 1705.6912,-321.7828 1705.7716,-310.0168 1684,-302 1630.1714,-282.1792 1208.1212,-307.9513 1169,-266 1002.0532,-86.9758 1522.9798,-33.4479 1723.1004,-19.7085"/>
-<polygon fill="#191970" stroke="#191970" points="1723.4996,-23.1897 1733.2426,-19.0275 1723.0306,-16.2054 1723.4996,-23.1897"/>
 </g>
-<!-- Node4&#45;&gt;Node14 -->
-<g id="edge65" class="edge">
-<title>Node4&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2111.5857,-443.6127C2209.8246,-439.0076 2399.0301,-426.8778 2460,-400 2552.3724,-359.2789 2631,-351.4498 2631,-250.5 2631,-250.5 2631,-250.5 2631,-133 2631,-64.1229 2871.6224,-29.3924 2959.9686,-18.962"/>
-<polygon fill="#191970" stroke="#191970" points="2960.428,-22.4323 2969.9613,-17.8098 2959.6261,-15.4784 2960.428,-22.4323"/>
+<!-- Node2&#45;&gt;Node5 -->
+<g id="edge132" class="edge">
+<title>Node2&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M1340.067,-728.1216C1413.1503,-710.5663 1581.0993,-670.0757 1722.0432,-635 1897.9615,-591.2205 2106.7367,-537.6389 2198.1969,-514.0843"/>
+<polygon fill="#191970" stroke="#191970" points="2199.1695,-517.448 2207.9802,-511.5639 2197.4232,-510.6694 2199.1695,-517.448"/>
 </g>
-<!-- Node4&#45;&gt;Node15 -->
-<g id="edge66" class="edge">
-<title>Node4&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1990.4085,-439.637C1805.9879,-419.4864 1254.6267,-353.225 1099,-266 1033.4509,-229.2613 1010.7364,-212.8363 983,-143 966.6663,-101.8741 1004.3041,-56.0504 1028.535,-32.1175"/>
-<polygon fill="#191970" stroke="#191970" points="1031.0214,-34.5828 1035.8456,-25.15 1026.192,-29.5155 1031.0214,-34.5828"/>
+<!-- Node2&#45;&gt;Node21 -->
+<g id="edge137" class="edge">
+<title>Node2&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M1303.978,-727.3619C1307.8189,-687.2813 1322.4462,-529.8284 1328.0432,-400 1328.8653,-380.931 1329.1025,-359.3616 1329.141,-343.061"/>
+<polygon fill="#191970" stroke="#191970" points="1332.6411,-342.7131 1329.1406,-332.7132 1325.6411,-342.7134 1332.6411,-342.7131"/>
 </g>
-<!-- Node26 -->
-<g id="node20" class="node">
-<title>Node26</title>
-<g id="a_node20"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
-<polygon fill="#ffffff" stroke="#000000" points="2334,-241 2334,-260 2472,-260 2472,-241 2334,-241"/>
-<text text-anchor="middle" x="2403" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
-</a>
+<!-- Node2&#45;&gt;Node10 -->
+<g id="edge139" class="edge">
+<title>Node2&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1340.2998,-735.3491C1465.1499,-729.7717 1880.1862,-710.8643 2223.0432,-691 2507.8755,-674.4975 3261.8913,-733.0415 3502.0432,-579 3655.2614,-480.7208 3555.0381,-294.1306 3414.0432,-179 3377.5523,-149.2029 3367.7205,-140.6566 3324.0432,-123 3277.7275,-104.2768 3222.5476,-92.6528 3179.8209,-85.7741"/>
+<polygon fill="#191970" stroke="#191970" points="3180.216,-82.2935 3169.7959,-84.2101 3179.1369,-89.2098 3180.216,-82.2935"/>
 </g>
+<!-- Node2&#45;&gt;Node16 -->
+<g id="edge140" class="edge">
+<title>Node2&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1286.4,-727.3964C1271.9353,-718.7097 1250.7409,-705.1451 1234.0432,-691 1111.7483,-587.4006 1073.1105,-551.5846 1021.0432,-400 972.5465,-258.8105 936.7229,-143.2909 1065.0432,-67 1136.2451,-24.668 1734.8803,-16.979 1881.6537,-15.7255"/>
+<polygon fill="#191970" stroke="#191970" points="1881.8372,-19.2242 1891.8084,-15.643 1881.7803,-12.2244 1881.8372,-19.2242"/>
 </g>
-<!-- Node4&#45;&gt;Node26 -->
-<g id="edge61" class="edge">
-<title>Node4&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2068.4557,-436.3051C2125.8246,-404.4426 2308.7749,-302.8324 2377.0118,-264.9338"/>
-<polygon fill="#191970" stroke="#191970" points="2378.8112,-267.938 2385.854,-260.0229 2375.4124,-261.8185 2378.8112,-267.938"/>
+<!-- Node2&#45;&gt;Node33 -->
+<g id="edge138" class="edge">
+<title>Node2&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M1298.594,-727.4458C1279.5956,-685.5502 1207.003,-513.7184 1224.0432,-369 1227.6234,-338.595 1221.7522,-326.5504 1240.0432,-302 1250.0239,-288.6039 1264.6,-278.12 1279.0783,-270.2092"/>
+<polygon fill="#191970" stroke="#191970" points="1280.7382,-273.2915 1288.0385,-265.6132 1277.5434,-267.063 1280.7382,-273.2915"/>
 </g>
-<!-- Node4&#45;&gt;Node27 -->
-<g id="edge32" class="edge">
-<title>Node4&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M2084.125,-436.3906C2115.3404,-427.3352 2162.898,-413.539 2201.131,-402.4479"/>
-<polygon fill="#191970" stroke="#191970" points="2202.5101,-405.6922 2211.139,-399.5446 2200.5598,-398.9693 2202.5101,-405.6922"/>
+<!-- Node2&#45;&gt;Node49 -->
+<g id="edge131" class="edge">
+<title>Node2&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M1340.3017,-732.7462C1491.6402,-715.468 2060.5075,-650.5209 2233.6968,-630.748"/>
+<polygon fill="#191970" stroke="#191970" points="2234.2701,-634.2054 2243.8085,-629.5936 2233.476,-627.2506 2234.2701,-634.2054"/>
 </g>
-<!-- Node28 -->
-<g id="node22" class="node">
-<title>Node28</title>
-<g id="a_node22"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1734.5,-308 1734.5,-327 1859.5,-327 1859.5,-308 1734.5,-308"/>
-<text text-anchor="middle" x="1797" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
+<!-- Node50 -->
+<g id="node39" class="node">
+<title>Node50</title>
+<g id="a_node39"><a xlink:href="runtime_2container_2adt_8h.html" target="_top" xlink:title="Runtime ADT container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="1046.0432,-235.5 1046.0432,-265.5 1172.0432,-265.5 1172.0432,-235.5 1046.0432,-235.5"/>
+<text text-anchor="start" x="1054.0432" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1109.0432" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/adt.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node28 -->
-<g id="edge63" class="edge">
-<title>Node4&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2031.9087,-436.3416C1987.4863,-413.8681 1877.0276,-357.9864 1824.9766,-331.6535"/>
-<polygon fill="#191970" stroke="#191970" points="1826.4694,-328.4864 1815.9663,-327.0952 1823.3094,-334.7326 1826.4694,-328.4864"/>
+<!-- Node2&#45;&gt;Node50 -->
+<g id="edge133" class="edge">
+<title>Node2&#45;&gt;Node50</title>
+<path fill="none" stroke="#191970" d="M1292.6226,-727.36C1256.7018,-693.5827 1136.7039,-576.1115 1072.0432,-456 1038.6264,-393.9259 1001.2056,-364.3824 1034.0432,-302 1040.8516,-289.0659 1052.5379,-278.7533 1064.686,-270.8569"/>
+<polygon fill="#191970" stroke="#191970" points="1066.7808,-273.6815 1073.5501,-265.5313 1063.1757,-267.6812 1066.7808,-273.6815"/>
 </g>
-<!-- Node19 -->
-<g id="node7" class="node">
-<title>Node19</title>
-<g id="a_node7"><a xlink:href="array_8h.html" target="_top" xlink:title="Runtime Array container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1514,-302.5 1514,-332.5 1640,-332.5 1640,-302.5 1514,-302.5"/>
-<text text-anchor="start" x="1522" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1577" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
+<!-- Node4 -->
+<g id="node5" class="node">
+<title>Node4</title>
+<g id="a_node5"><a xlink:href="ir_2span_8h.html" target="_top" xlink:title="Span information for debugging purposes. ">
+<polygon fill="#ffffff" stroke="#000000" points="2197.5432,-554 2197.5432,-573 2278.5432,-573 2278.5432,-554 2197.5432,-554"/>
+<text text-anchor="middle" x="2238.0432" y="-561" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/span.h</text>
 </a>
 </g>
 </g>
-<!-- Node5&#45;&gt;Node19 -->
-<g id="edge6" class="edge">
-<title>Node5&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M2337.393,-370.387C2334.5635,-369.8747 2331.7543,-369.4074 2329,-369 2063.4129,-329.713 1993.5829,-354.8948 1726,-333 1701.2778,-330.9771 1674.2954,-328.3101 1650.1683,-325.7597"/>
-<polygon fill="#191970" stroke="#191970" points="1650.3329,-322.2575 1640.0178,-324.6761 1649.5897,-329.218 1650.3329,-322.2575"/>
+<!-- Node3&#45;&gt;Node4 -->
+<g id="edge4" class="edge">
+<title>Node3&#45;&gt;Node4</title>
+<path fill="none" stroke="#191970" d="M2179.2806,-671.3845C2189.8754,-651.9331 2214.0153,-607.6138 2227.8326,-582.2462"/>
+<polygon fill="#191970" stroke="#191970" points="2231.0238,-583.7044 2232.7335,-573.2484 2224.8765,-580.356 2231.0238,-583.7044"/>
 </g>
-<!-- Node5&#45;&gt;Node14 -->
-<g id="edge31" class="edge">
-<title>Node5&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2438.4201,-369.3685C2486.0752,-349.9817 2555,-311.4131 2555,-250.5 2555,-250.5 2555,-250.5 2555,-133 2555,-92.7538 2581.2381,-85.4619 2617,-67 2676.6042,-36.2296 2879.6195,-21.7846 2959.4145,-17.1954"/>
-<polygon fill="#191970" stroke="#191970" points="2960.0397,-20.666 2969.8284,-16.6126 2959.6484,-13.677 2960.0397,-20.666"/>
+<!-- Node3&#45;&gt;Node5 -->
+<g id="edge124" class="edge">
+<title>Node3&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M2172.7727,-671.2485C2170.2501,-648.2079 2166.6341,-589.2433 2189.0432,-548 2195.8684,-535.4385 2207.667,-524.9657 2218.6795,-517.2022"/>
+<polygon fill="#191970" stroke="#191970" points="2220.8066,-519.9922 2227.2327,-511.5687 2216.9562,-514.1462 2220.8066,-519.9922"/>
 </g>
-<!-- Node5&#45;&gt;Node26 -->
-<g id="edge26" class="edge">
-<title>Node5&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2395.0149,-369.389C2396.648,-345.0735 2399.8591,-297.2644 2401.6722,-270.2698"/>
-<polygon fill="#191970" stroke="#191970" points="2405.1769,-270.3163 2402.3549,-260.1042 2398.1926,-269.8471 2405.1769,-270.3163"/>
+<!-- Node22 -->
+<g id="node10" class="node">
+<title>Node22</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1370.0432,-123.5 1370.0432,-142.5 1434.0432,-142.5 1434.0432,-123.5 1370.0432,-123.5"/>
+<text text-anchor="middle" x="1402.0432" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
 </g>
-<!-- Node20 -->
-<g id="node8" class="node">
-<title>Node20</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1552,-123.5 1552,-142.5 1616,-142.5 1616,-123.5 1552,-123.5"/>
-<text text-anchor="middle" x="1584" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
+<!-- Node3&#45;&gt;Node22 -->
+<g id="edge127" class="edge">
+<title>Node3&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M2134.376,-679.5161C1997.5431,-674.2235 1552.4099,-655.4918 1530.0432,-635 1454.3754,-565.6748 1507.7234,-268.2363 1457.0432,-179 1450.0097,-166.6156 1438.2772,-156.1128 1427.4507,-148.2834"/>
+<polygon fill="#191970" stroke="#191970" points="1429.3088,-145.3145 1419.0695,-142.5927 1425.3766,-151.1057 1429.3088,-145.3145"/>
 </g>
-<!-- Node19&#45;&gt;Node20 -->
-<g id="edge7" class="edge">
-<title>Node19&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1596.0377,-302.4331C1634.1817,-272.149 1716.631,-206.1673 1720,-199 1723.7814,-190.9555 1725.1815,-186.2225 1720,-179 1708.8257,-163.4243 1661.9814,-149.959 1626.3655,-141.6912"/>
-<polygon fill="#191970" stroke="#191970" points="1626.7165,-138.1826 1616.1916,-139.3977 1625.1771,-145.0113 1626.7165,-138.1826"/>
+<!-- Node3&#45;&gt;Node10 -->
+<g id="edge126" class="edge">
+<title>Node3&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2213.7501,-679.7482C2358.5597,-675.0401 2858.0645,-657.5104 3016.0432,-635 3054.893,-629.4643 3063.3179,-621.3483 3102.0432,-615 3184.0022,-601.5644 3410.8633,-632.908 3474.0432,-579 3590.8202,-479.3607 3531.4376,-360.5041 3443.0432,-235 3380.8025,-146.6292 3255.7188,-106.0885 3177.8213,-88.7329"/>
+<polygon fill="#191970" stroke="#191970" points="3178.1375,-85.2207 3167.6243,-86.5332 3176.6614,-92.0633 3178.1375,-85.2207"/>
 </g>
-<!-- Node21 -->
-<g id="node9" class="node">
-<title>Node21</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1772,-179.5 1772,-198.5 1830,-198.5 1830,-179.5 1772,-179.5"/>
-<text text-anchor="middle" x="1801" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
+<!-- Node3&#45;&gt;Node16 -->
+<g id="edge129" class="edge">
+<title>Node3&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2134.477,-679.4796C2002.2141,-674.2095 1582.6472,-655.8999 1563.0432,-635 1502.5326,-570.489 1542.742,-522.5348 1561.0432,-436 1581.2585,-340.4153 1591.1502,-312.9531 1650.0432,-235 1720.5691,-141.6493 1838.1457,-62.4095 1889.1619,-30.5207"/>
+<polygon fill="#191970" stroke="#191970" points="1891.223,-33.3614 1897.8869,-25.1247 1887.541,-27.4079 1891.223,-33.3614"/>
 </g>
-<!-- Node19&#45;&gt;Node21 -->
-<g id="edge8" class="edge">
-<title>Node19&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1610.1509,-302.4717C1631.0777,-292.6848 1658.5006,-279.304 1682,-266 1683.2224,-265.3079 1743.2942,-226.3957 1777.6057,-204.1618"/>
-<polygon fill="#191970" stroke="#191970" points="1779.6381,-207.0155 1786.1266,-198.64 1775.8312,-201.1411 1779.6381,-207.0155"/>
+<!-- Node17 -->
+<g id="node21" class="node">
+<title>Node17</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2897.5432,-6 2897.5432,-25 2966.5432,-25 2966.5432,-6 2897.5432,-6"/>
+<text text-anchor="middle" x="2932.0432" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
 </g>
-<!-- Node19&#45;&gt;Node16 -->
-<g id="edge9" class="edge">
-<title>Node19&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1513.7589,-316.0658C1415.544,-312.753 1220.9,-301.837 1060,-266 972.6007,-246.5337 952.2701,-234.3441 870,-199 803.3731,-170.3764 609.4267,-67.8812 537.9019,-29.8429"/>
-<polygon fill="#191970" stroke="#191970" points="539.5029,-26.7302 529.0309,-25.1217 536.2142,-32.9096 539.5029,-26.7302"/>
+<!-- Node3&#45;&gt;Node17 -->
+<g id="edge130" class="edge">
+<title>Node3&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M2213.7071,-678.8845C2325.1441,-672.7659 2639.5245,-654.255 2741.0432,-635 2983.6614,-588.9827 3063.1027,-595.2457 3267.0432,-456 3348.7204,-400.2328 3361.5734,-306.3075 3293.0432,-235 3203.3063,-141.6263 3152.1382,-155.1943 3042.0432,-87 3011.5801,-68.1307 2977.0155,-45.4502 2954.8955,-30.7693"/>
+<polygon fill="#191970" stroke="#191970" points="2956.7692,-27.812 2946.5045,-25.1876 2952.8921,-33.6403 2956.7692,-27.812"/>
 </g>
-<!-- Node19&#45;&gt;Node18 -->
-<g id="edge10" class="edge">
-<title>Node19&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1640.3078,-302.6675C1711.4661,-286.0332 1830.4629,-258.329 1933,-235 1993.6456,-221.2021 2064.4249,-205.4803 2105.2437,-196.4507"/>
-<polygon fill="#191970" stroke="#191970" points="2106.1872,-199.8267 2115.1958,-194.2504 2104.676,-192.9917 2106.1872,-199.8267"/>
+<!-- Node3&#45;&gt;Node33 -->
+<g id="edge125" class="edge">
+<title>Node3&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M2134.1536,-679.7292C2007.5935,-675.4741 1616.1244,-660.539 1492.0432,-635 1418.66,-619.8959 1384.9779,-633.0003 1333.0432,-579 1244.5504,-486.9875 1198.6653,-415.531 1257.0432,-302 1263.6033,-289.2423 1274.9519,-278.9411 1286.7249,-271.0012"/>
+<polygon fill="#191970" stroke="#191970" points="1288.6848,-273.9037 1295.31,-265.6358 1284.9749,-267.9676 1288.6848,-273.9037"/>
 </g>
-<!-- Node22 -->
-<g id="node12" class="node">
-<title>Node22</title>
-<g id="a_node12"><a xlink:href="runtime_2container_2base_8h.html" target="_top" xlink:title="Base utilities for common POD(plain old data) container types. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1254.5,-179.5 1254.5,-198.5 1309.5,-198.5 1309.5,-179.5 1254.5,-179.5"/>
-<text text-anchor="middle" x="1282" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
-</a>
+<!-- Node45 -->
+<g id="node36" class="node">
+<title>Node45</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2898.0432,-308 2898.0432,-327 2942.0432,-327 2942.0432,-308 2898.0432,-308"/>
+<text text-anchor="middle" x="2920.0432" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">limits</text>
 </g>
+<!-- Node3&#45;&gt;Node45 -->
+<g id="edge128" class="edge">
+<title>Node3&#45;&gt;Node45</title>
+<path fill="none" stroke="#191970" d="M2213.7682,-678.3235C2358.9152,-668.1602 2854.5696,-629.9433 2902.0432,-579 2965.3646,-511.0508 2939.1589,-385.2291 2925.9311,-337.0065"/>
+<polygon fill="#191970" stroke="#191970" points="2929.2444,-335.8629 2923.1247,-327.2142 2922.5153,-337.7915 2929.2444,-335.8629"/>
 </g>
-<!-- Node19&#45;&gt;Node22 -->
-<g id="edge11" class="edge">
-<title>Node19&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1559.0957,-302.2104C1536.6412,-283.7691 1496.3025,-253.009 1457,-235 1412.0324,-214.3951 1356.1772,-201.7349 1319.8392,-195.0413"/>
-<polygon fill="#191970" stroke="#191970" points="1320.1367,-191.5393 1309.6779,-193.2316 1318.9093,-198.4308 1320.1367,-191.5393"/>
+<!-- Node3&#45;&gt;Node49 -->
+<g id="edge117" class="edge">
+<title>Node3&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M2193.2038,-671.2455C2210.5544,-662.4125 2236.3333,-649.2887 2256.0015,-639.2758"/>
+<polygon fill="#191970" stroke="#191970" points="2257.7785,-642.2986 2265.1023,-634.6427 2254.6027,-636.0605 2257.7785,-642.2986"/>
 </g>
-<!-- Node22&#45;&gt;Node20 -->
-<g id="edge24" class="edge">
-<title>Node22&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1309.5109,-182.2059C1314.3292,-181.0858 1319.3028,-179.9738 1324,-179 1400.4262,-163.1551 1490.2063,-148.0382 1541.7672,-139.6903"/>
-<polygon fill="#191970" stroke="#191970" points="1542.5471,-143.1098 1551.8628,-138.0633 1541.4333,-136.199 1542.5471,-143.1098"/>
+<!-- Node4&#45;&gt;Node5 -->
+<g id="edge5" class="edge">
+<title>Node4&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M2239.137,-553.8906C2240.1073,-545.3657 2241.5559,-532.6392 2242.7755,-521.9235"/>
+<polygon fill="#191970" stroke="#191970" points="2246.2806,-522.0766 2243.9341,-511.7449 2239.3255,-521.2849 2246.2806,-522.0766"/>
 </g>
-<!-- Node22&#45;&gt;Node16 -->
-<g id="edge25" class="edge">
-<title>Node22&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1293.2589,-179.1032C1307.6252,-165.3031 1328.9266,-140.058 1314,-123 1262.211,-63.8158 688.0184,-25.9936 543.7408,-17.3804"/>
-<polygon fill="#191970" stroke="#191970" points="543.9209,-13.8851 533.7318,-16.7888 543.5078,-20.8729 543.9209,-13.8851"/>
+<!-- Node4&#45;&gt;Node10 -->
+<g id="edge115" class="edge">
+<title>Node4&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2278.6485,-562.0092C2445.8266,-555.4525 3080.8386,-526.3168 3267.0432,-456 3355.3015,-422.6709 3396.7824,-416.8382 3440.0432,-333 3477.8124,-259.8045 3223.582,-126.8184 3217.0432,-123 3194.734,-109.9721 3168.0411,-98.4904 3146.897,-90.2565"/>
+<polygon fill="#191970" stroke="#191970" points="3147.8988,-86.8926 3137.3081,-86.5956 3145.402,-93.4322 3147.8988,-86.8926"/>
+</g>
+<!-- Node4&#45;&gt;Node16 -->
+<g id="edge116" class="edge">
+<title>Node4&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2217.5612,-553.8671C2181.4863,-536.5079 2104.8692,-497.8656 2046.0432,-456 1938.1915,-379.2435 1914.0432,-321.3766 1914.0432,-189 1914.0432,-189 1914.0432,-189 1914.0432,-133 1914.0432,-98.6399 1914.0432,-58.628 1914.0432,-35.2764"/>
+<polygon fill="#191970" stroke="#191970" points="1917.5433,-35.2489 1914.0432,-25.2489 1910.5433,-35.249 1917.5433,-35.2489"/>
+</g>
+<!-- Node5&#45;&gt;Node6 -->
+<g id="edge6" class="edge">
+<title>Node5&#45;&gt;Node6</title>
+<path fill="none" stroke="#191970" d="M2294.6611,-500.6892C2425.9947,-496.9242 2793.4911,-484.4708 3098.0432,-456 3099.5146,-455.8624 3101.0015,-455.7177 3102.4998,-455.5666"/>
+<polygon fill="#191970" stroke="#191970" points="3102.9405,-459.0394 3112.5075,-454.4871 3102.1898,-452.0798 3102.9405,-459.0394"/>
 </g>
 <!-- Node7 -->
-<g id="node13" class="node">
+<g id="node8" class="node">
 <title>Node7</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1216.5,-123.5 1216.5,-142.5 1305.5,-142.5 1305.5,-123.5 1216.5,-123.5"/>
-<text text-anchor="middle" x="1261" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
+<g id="a_node8"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="2018.5432,-369.5 2018.5432,-399.5 2131.5432,-399.5 2131.5432,-369.5 2018.5432,-369.5"/>
+<text text-anchor="start" x="2026.5432" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="2075.0432" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+</a>
 </g>
-<!-- Node22&#45;&gt;Node7 -->
-<g id="edge12" class="edge">
-<title>Node22&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1278.3421,-179.2455C1275.5416,-171.7776 1271.591,-161.2427 1268.1676,-152.1137"/>
-<polygon fill="#191970" stroke="#191970" points="1271.4045,-150.777 1264.616,-142.6427 1264.8501,-153.2349 1271.4045,-150.777"/>
 </g>
-<!-- Node13 -->
-<g id="node14" class="node">
-<title>Node13</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1880.5,-6 1880.5,-25 2005.5,-25 2005.5,-6 1880.5,-6"/>
-<text text-anchor="middle" x="1943" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
+<!-- Node5&#45;&gt;Node7 -->
+<g id="edge106" class="edge">
+<title>Node5&#45;&gt;Node7</title>
+<path fill="none" stroke="#191970" d="M2231.1314,-492.3845C2203.7483,-473.4579 2142.301,-430.987 2105.1945,-405.3398"/>
+<polygon fill="#191970" stroke="#191970" points="2106.9859,-402.3233 2096.7695,-399.5167 2103.0058,-408.0817 2106.9859,-402.3233"/>
 </g>
-<!-- Node22&#45;&gt;Node13 -->
-<g id="edge13" class="edge">
-<title>Node22&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1298.3085,-179.4859C1323.4283,-165.2194 1373.4605,-138.3269 1419,-123 1613.7113,-57.4674 1670.9699,-73.3547 1872,-31 1877.7735,-29.7836 1883.807,-28.4943 1889.8128,-27.1995"/>
-<polygon fill="#191970" stroke="#191970" points="1890.8109,-30.5645 1899.8433,-25.0268 1889.329,-23.7232 1890.8109,-30.5645"/>
+<!-- Node5&#45;&gt;Node18 -->
+<g id="edge113" class="edge">
+<title>Node5&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M2195.2541,-498.8182C2082.3948,-491.5693 1798.4628,-473.1271 1561.0432,-456 1276.2181,-435.4532 304.0432,-474.5653 304.0432,-189 304.0432,-189 304.0432,-189 304.0432,-133 304.0432,-82.8957 464.7672,-39.4457 535.4027,-22.7694"/>
+<polygon fill="#191970" stroke="#191970" points="536.5862,-26.0878 545.5348,-20.4156 535.0021,-19.2694 536.5862,-26.0878"/>
 </g>
-<!-- Node22&#45;&gt;Node23 -->
-<g id="edge14" class="edge">
-<title>Node22&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1254.4106,-182.1333C1217.318,-172.9014 1150.7828,-156.3415 1105.2967,-145.0205"/>
-<polygon fill="#191970" stroke="#191970" points="1105.9087,-141.5661 1095.3594,-142.5472 1104.218,-148.3589 1105.9087,-141.5661"/>
+<!-- Node5&#45;&gt;Node20 -->
+<g id="edge114" class="edge">
+<title>Node5&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M2195.2841,-494.591C2141.48,-485.9978 2059.697,-471.0731 2032.0432,-456 2001.1378,-439.1545 1999.1181,-426.5794 1976.0432,-400 1915.9058,-330.7288 1920.2526,-296.3392 1852.0432,-235 1837.9481,-222.3245 1819.8466,-211.3023 1804.7362,-203.2273"/>
+<polygon fill="#191970" stroke="#191970" points="1806.267,-200.0787 1795.777,-198.5919 1803.0503,-206.2958 1806.267,-200.0787"/>
 </g>
-<!-- Node22&#45;&gt;Node8 -->
-<g id="edge23" class="edge">
-<title>Node22&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1308.0141,-179.4756C1348.7815,-164.944 1430.4634,-137.4095 1502,-123 1628.2926,-97.561 1778.372,-85.5905 1864.2743,-80.4405"/>
-<polygon fill="#191970" stroke="#191970" points="1864.648,-83.9247 1874.4262,-79.8459 1864.2386,-76.9367 1864.648,-83.9247"/>
+<!-- Node25 -->
+<g id="node17" class="node">
+<title>Node25</title>
+<g id="a_node17"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="2356.5432,-123.5 2356.5432,-142.5 2485.5432,-142.5 2485.5432,-123.5 2356.5432,-123.5"/>
+<text text-anchor="middle" x="2421.0432" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
+</a>
 </g>
-<!-- Node23&#45;&gt;Node16 -->
-<g id="edge22" class="edge">
-<title>Node23&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1022.423,-123.4981C972.4607,-109.9492 877.0648,-84.756 795,-67 704.5061,-47.4202 596.6957,-29.276 543.7072,-20.6949"/>
-<polygon fill="#191970" stroke="#191970" points="543.9891,-17.1952 533.5597,-19.0601 542.8757,-24.1061 543.9891,-17.1952"/>
 </g>
-<!-- Node23&#45;&gt;Node8 -->
-<g id="edge15" class="edge">
-<title>Node23&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1121.6004,-128.6454C1148.0679,-126.8737 1179.0021,-124.819 1207,-123 1448.1403,-107.3331 1734.3205,-89.427 1863.9679,-81.3523"/>
-<polygon fill="#191970" stroke="#191970" points="1864.3894,-84.8329 1874.1525,-80.7181 1863.9543,-77.8464 1864.3894,-84.8329"/>
+<!-- Node5&#45;&gt;Node25 -->
+<g id="edge109" class="edge">
+<title>Node5&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M2294.662,-497.8941C2441.4694,-484.6196 2868.5938,-437.5152 2951.0432,-333 3016.2213,-250.3784 2918.046,-295.537 2821.0432,-266 2686.2541,-224.9572 2527.9613,-170.4219 2457.7569,-145.9033"/>
+<polygon fill="#191970" stroke="#191970" points="2458.6461,-142.5065 2448.0513,-142.5088 2456.3351,-149.114 2458.6461,-142.5065"/>
 </g>
-<!-- Node23&#45;&gt;Node15 -->
-<g id="edge21" class="edge">
-<title>Node23&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1058.106,-123.1456C1059.0911,-113.8422 1060.4578,-99.4936 1061,-87 1061.3854,-78.1195 1062.1762,-75.8107 1061,-67 1059.5569,-56.1902 1056.4813,-44.4337 1053.5794,-34.8862"/>
-<polygon fill="#191970" stroke="#191970" points="1056.8632,-33.6646 1050.4731,-25.2137 1050.1984,-35.8049 1056.8632,-33.6646"/>
+<!-- Node5&#45;&gt;Node10 -->
+<g id="edge110" class="edge">
+<title>Node5&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2294.8152,-501.3301C2491.52,-498.4755 3205.8421,-486.0688 3243.0432,-456 3321.3633,-392.6959 3334.9591,-326.5669 3293.0432,-235 3261.1415,-165.3094 3181.6435,-114.6506 3138.6797,-91.3304"/>
+<polygon fill="#191970" stroke="#191970" points="3140.3195,-88.2383 3129.8456,-86.6419 3137.0379,-94.4215 3140.3195,-88.2383"/>
 </g>
-<!-- Node8&#45;&gt;Node16 -->
-<g id="edge20" class="edge">
-<title>Node8&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1874.2165,-74.4162C1632.0378,-63.9496 728.0823,-24.882 543.8891,-16.9214"/>
-<polygon fill="#191970" stroke="#191970" points="543.8911,-13.4183 533.7493,-16.4832 543.5888,-20.4118 543.8911,-13.4183"/>
+<!-- Node11 -->
+<g id="node19" class="node">
+<title>Node11</title>
+<g id="a_node19"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="3437.5432,-.5 3437.5432,-30.5 3566.5432,-30.5 3566.5432,-.5 3437.5432,-.5"/>
+<text text-anchor="start" x="3445.5432" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
+<text text-anchor="middle" x="3502.0432" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
+</a>
 </g>
-<!-- Node8&#45;&gt;Node13 -->
-<g id="edge17" class="edge">
-<title>Node8&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1935.4063,-67.3906C1936.6538,-58.8657 1938.5162,-46.1392 1940.0844,-35.4235"/>
-<polygon fill="#191970" stroke="#191970" points="1943.589,-35.6463 1941.5739,-25.2449 1936.6627,-34.6326 1943.589,-35.6463"/>
 </g>
-<!-- Node8&#45;&gt;Node9 -->
-<g id="edge16" class="edge">
-<title>Node8&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M1912.75,-67.3906C1893.4919,-58.682 1864.5378,-45.5888 1840.5329,-34.7336"/>
-<polygon fill="#191970" stroke="#191970" points="1841.8232,-31.4759 1831.2693,-30.5446 1838.9389,-37.8541 1841.8232,-31.4759"/>
+<!-- Node5&#45;&gt;Node11 -->
+<g id="edge108" class="edge">
+<title>Node5&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M2294.6865,-500.8923C2486.3584,-496.4615 3172.1926,-479.1299 3267.0432,-456 3541.5682,-389.0554 3844.0432,-471.5695 3844.0432,-189 3844.0432,-189 3844.0432,-189 3844.0432,-133 3844.0432,-76.9311 3677.5542,-41.9026 3576.9009,-25.8488"/>
+<polygon fill="#191970" stroke="#191970" points="3577.1942,-22.3522 3566.7729,-24.2619 3576.1105,-29.2678 3577.1942,-22.3522"/>
 </g>
-<!-- Node8&#45;&gt;Node14 -->
-<g id="edge18" class="edge">
-<title>Node8&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1993.5313,-73.5395C2189.4776,-62.1495 2810.305,-26.0617 2959.5202,-17.388"/>
-<polygon fill="#191970" stroke="#191970" points="2960.0424,-20.8636 2969.8224,-16.7891 2959.6362,-13.8754 2960.0424,-20.8636"/>
+<!-- Node5&#45;&gt;Node16 -->
+<g id="edge111" class="edge">
+<title>Node5&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2195.509,-493.8539C2141.841,-482.3889 2057.5396,-456.2334 2010.0432,-400 1911.6916,-283.5562 1966.2601,-214.5523 1928.0432,-67 1925.3088,-56.4426 1922.1293,-44.7065 1919.4921,-35.1073"/>
+<polygon fill="#191970" stroke="#191970" points="1922.8342,-34.061 1916.7975,-25.3542 1916.087,-35.9252 1922.8342,-34.061"/>
 </g>
-<!-- Node8&#45;&gt;Node15 -->
-<g id="edge19" class="edge">
-<title>Node8&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1874.4241,-72.8693C1707.71,-61.3102 1239.0604,-28.8165 1091.8403,-18.609"/>
-<polygon fill="#191970" stroke="#191970" points="1092.0227,-15.1133 1081.8045,-17.9132 1091.5384,-22.0966 1092.0227,-15.1133"/>
+<!-- Node5&#45;&gt;Node17 -->
+<g id="edge112" class="edge">
+<title>Node5&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M2294.5933,-499.9556C2453.3609,-492.1762 2943.6545,-458.1975 3022.0432,-333 3054.5215,-281.1279 2970.1112,-94.7121 2941.2878,-34.4412"/>
+<polygon fill="#191970" stroke="#191970" points="2944.3744,-32.7839 2936.8769,-25.298 2938.0697,-35.8255 2944.3744,-32.7839"/>
 </g>
-<!-- Node26&#45;&gt;Node13 -->
-<g id="edge28" class="edge">
-<title>Node26&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M2402.6994,-240.964C2401.1686,-210.104 2391.8404,-112.3355 2335,-67 2310.5345,-47.4865 2121.9964,-29.6398 2015.5627,-20.9878"/>
-<polygon fill="#191970" stroke="#191970" points="2015.776,-17.4937 2005.5272,-20.1795 2015.2139,-24.4711 2015.776,-17.4937"/>
+<!-- Node29 -->
+<g id="node25" class="node">
+<title>Node29</title>
+<g id="a_node25"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="2188.5432,-369.5 2188.5432,-399.5 2301.5432,-399.5 2301.5432,-369.5 2188.5432,-369.5"/>
+<text text-anchor="start" x="2196.5432" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="2245.0432" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
+</a>
 </g>
-<!-- Node26&#45;&gt;Node9 -->
-<g id="edge27" class="edge">
-<title>Node26&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M2398.0375,-240.6451C2381.4082,-208.8084 2324.1535,-108.3872 2244,-67 2172.7985,-30.2352 1969.1207,-42.5683 1872.9692,-30.8907"/>
-<polygon fill="#191970" stroke="#191970" points="1873.2804,-27.4008 1862.8986,-29.515 1872.3329,-34.3364 1873.2804,-27.4008"/>
 </g>
-<!-- Node26&#45;&gt;Node14 -->
-<g id="edge29" class="edge">
-<title>Node26&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2404.0647,-240.8321C2408.0712,-209.5681 2425.2638,-110.7024 2486,-67 2524.4587,-39.3272 2854.1001,-21.8562 2959.6397,-16.9321"/>
-<polygon fill="#191970" stroke="#191970" points="2959.8711,-20.4253 2969.6996,-16.4693 2959.5493,-13.4327 2959.8711,-20.4253"/>
+<!-- Node5&#45;&gt;Node29 -->
+<g id="edge107" class="edge">
+<title>Node5&#45;&gt;Node29</title>
+<path fill="none" stroke="#191970" d="M2245.0432,-492.3845C2245.0432,-474.544 2245.0432,-435.7839 2245.0432,-409.9138"/>
+<polygon fill="#191970" stroke="#191970" points="2248.5433,-409.7143 2245.0432,-399.7143 2241.5433,-409.7143 2248.5433,-409.7143"/>
 </g>
-<!-- Node26&#45;&gt;Node15 -->
-<g id="edge30" class="edge">
-<title>Node26&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M2390.8671,-240.7217C2361.1158,-217.191 2281.4532,-156.8573 2206,-123 2120.6104,-84.684 2095.3744,-82.0489 2003,-67 1824.1142,-37.8574 1256.2114,-20.9927 1091.9233,-16.6365"/>
-<polygon fill="#191970" stroke="#191970" points="1091.7826,-13.1317 1081.694,-16.3677 1091.5986,-20.1293 1091.7826,-13.1317"/>
+<!-- Node6&#45;&gt;Node7 -->
+<g id="edge7" class="edge">
+<title>Node6&#45;&gt;Node7</title>
+<path fill="none" stroke="#191970" d="M3112.4768,-444.6416C2921.7033,-440.1492 2334.7427,-424.6411 2146.0432,-400 2144.7414,-399.83 2143.428,-399.6482 2142.1063,-399.4559"/>
+<polygon fill="#191970" stroke="#191970" points="2142.2911,-395.9402 2131.8599,-397.7952 2141.1711,-402.85 2142.2911,-395.9402"/>
 </g>
-<!-- Node27&#45;&gt;Node14 -->
-<g id="edge59" class="edge">
-<title>Node27&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2316.0966,-369.3848C2366.1165,-352.3883 2439.4094,-319.8766 2481,-266 2502.5393,-238.0979 2500,-224.2487 2500,-189 2500,-189 2500,-189 2500,-133 2500,-88.5978 2534.498,-85.1973 2575,-67 2643.9141,-36.0373 2874.3206,-21.4634 2959.7596,-17.0282"/>
-<polygon fill="#191970" stroke="#191970" points="2960.0927,-20.5159 2969.9028,-16.5143 2959.7384,-13.5249 2960.0927,-20.5159"/>
+<!-- Node6&#45;&gt;Node20 -->
+<g id="edge105" class="edge">
+<title>Node6&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M3112.5328,-445.3586C2910.5016,-442.9192 2267.0448,-432.6433 2180.0432,-400 2159.3124,-392.2217 2160.17,-380.1551 2141.0432,-369 2104.1849,-347.5034 2090.1968,-352.1036 2052.0432,-333 1976.7991,-295.3251 1965.3602,-272.5293 1890.0432,-235 1863.2668,-221.6577 1831.6294,-209.2315 1808.3227,-200.6662"/>
+<polygon fill="#191970" stroke="#191970" points="1809.4767,-197.3617 1798.8826,-197.2403 1807.0887,-203.9418 1809.4767,-197.3617"/>
 </g>
-<!-- Node27&#45;&gt;Node26 -->
-<g id="edge33" class="edge">
-<title>Node27&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2278.7876,-369.389C2305.2126,-344.0965 2358.1951,-293.3847 2385.6362,-267.1196"/>
-<polygon fill="#191970" stroke="#191970" points="2388.1617,-269.5472 2392.9658,-260.1042 2383.3215,-264.4903 2388.1617,-269.5472"/>
+<!-- Node6&#45;&gt;Node25 -->
+<g id="edge69" class="edge">
+<title>Node6&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M3158.942,-436.3408C3145.671,-427.1752 3125.3115,-412.9199 3108.0432,-400 3051.7986,-357.9184 3044.7698,-337.3073 2984.0432,-302 2858.0412,-228.7406 2820.1135,-219.2963 2680.0432,-179 2619.1474,-161.4811 2548.1791,-149.4397 2495.9451,-142.0845"/>
+<polygon fill="#191970" stroke="#191970" points="2496.1418,-138.5785 2485.7566,-140.6757 2495.183,-145.5125 2496.1418,-138.5785"/>
 </g>
-<!-- Node27&#45;&gt;Node28 -->
-<g id="edge34" class="edge">
-<title>Node27&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2206.3251,-376.3515C2121.8086,-364.2 1962.752,-341.3313 1869.7537,-327.9603"/>
-<polygon fill="#191970" stroke="#191970" points="1869.9771,-324.4565 1859.5808,-326.4977 1868.9809,-331.3853 1869.9771,-324.4565"/>
+<!-- Node6&#45;&gt;Node10 -->
+<g id="edge71" class="edge">
+<title>Node6&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M3175.606,-436.4052C3181.4681,-412.5287 3193.8828,-348.9842 3174.0432,-302 3165.118,-280.8633 3147.4444,-286.4508 3137.0432,-266 3108.4348,-209.7496 3107.4456,-132.6273 3108.8175,-96.793"/>
+<polygon fill="#191970" stroke="#191970" points="3112.3189,-96.8478 3109.3138,-86.6881 3105.3274,-96.5043 3112.3189,-96.8478"/>
 </g>
-<!-- Node28&#45;&gt;Node16 -->
-<g id="edge57" class="edge">
-<title>Node28&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1734.3882,-310.5297C1708.0413,-307.7039 1677.0471,-304.5199 1649,-302 1435.9258,-282.8563 1379.5953,-303.6396 1169,-266 1062.5084,-246.9668 1036.6044,-236.1385 935,-199 800.4726,-149.8274 772.9484,-122.724 641,-67 607.8838,-53.0145 569.5815,-37.9768 543.1736,-27.7838"/>
-<polygon fill="#191970" stroke="#191970" points="544.2603,-24.4518 533.6705,-24.1267 541.7462,-30.9848 544.2603,-24.4518"/>
+<!-- Node6&#45;&gt;Node11 -->
+<g id="edge67" class="edge">
+<title>Node6&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M3187.4881,-436.4331C3237.2137,-402.7156 3402.7638,-283.8181 3485.0432,-143 3503.1273,-112.0499 3515.4754,-102.6813 3512.0432,-67 3511.2093,-58.3305 3509.6031,-48.952 3507.9179,-40.6039"/>
+<polygon fill="#191970" stroke="#191970" points="3511.3152,-39.7535 3505.7933,-30.7114 3504.4713,-41.2235 3511.3152,-39.7535"/>
 </g>
-<!-- Node28&#45;&gt;Node18 -->
-<g id="edge58" class="edge">
-<title>Node28&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1822.4262,-307.9466C1883.3631,-285.0507 2037.7262,-227.0517 2105.8381,-201.4599"/>
-<polygon fill="#191970" stroke="#191970" points="2107.1102,-204.721 2115.2402,-197.9273 2104.6481,-198.1682 2107.1102,-204.721"/>
+<!-- Node6&#45;&gt;Node16 -->
+<g id="edge103" class="edge">
+<title>Node6&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M3164.8171,-436.2987C3127.1967,-392.5777 2965.9696,-212.1232 2794.0432,-123 2774.5036,-112.871 2625.6944,-70.9524 2604.0432,-67 2475.1244,-43.466 2064.7352,-22.6377 1946.3177,-16.9984"/>
+<polygon fill="#191970" stroke="#191970" points="1946.2526,-13.4915 1936.0983,-16.515 1945.9218,-20.4837 1946.2526,-13.4915"/>
 </g>
-<!-- Node28&#45;&gt;Node8 -->
-<g id="edge56" class="edge">
-<title>Node28&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1803.5399,-307.8585C1816.9756,-287.9055 1848.4036,-240.4784 1872,-199 1892.3534,-163.2222 1913.5542,-119.9941 1925.1336,-95.786"/>
-<polygon fill="#191970" stroke="#191970" points="1928.343,-97.1868 1929.4766,-86.6528 1922.0213,-94.1807 1928.343,-97.1868"/>
+<!-- Node6&#45;&gt;Node17 -->
+<g id="edge104" class="edge">
+<title>Node6&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M3171.8311,-436.4999C3168.5559,-412.8397 3158.2396,-349.7498 3136.0432,-302 3108.8531,-243.5075 2987.5939,-86.421 2946.0563,-33.3209"/>
+<polygon fill="#191970" stroke="#191970" points="2948.6053,-30.8993 2939.6814,-25.1884 2943.0962,-35.2179 2948.6053,-30.8993"/>
 </g>
-<!-- Node28&#45;&gt;Node9 -->
-<g id="edge35" class="edge">
-<title>Node28&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M1800.4868,-307.9437C1810.3887,-280.7694 1838.0289,-204.6467 1839,-199 1848.8696,-141.6101 1825.0752,-75.0986 1809.7411,-40.0456"/>
-<polygon fill="#191970" stroke="#191970" points="1812.7924,-38.2981 1805.4879,-30.6238 1806.4123,-41.1782 1812.7924,-38.2981"/>
+<!-- Node28 -->
+<g id="node24" class="node">
+<title>Node28</title>
+<g id="a_node24"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
+<polygon fill="#ffffff" stroke="#000000" points="3146.0432,-241 3146.0432,-260 3284.0432,-260 3284.0432,-241 3146.0432,-241"/>
+<text text-anchor="middle" x="3215.0432" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
+</a>
 </g>
-<!-- Node28&#45;&gt;Node26 -->
-<g id="edge55" class="edge">
-<title>Node28&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1859.5702,-310.5822C1970.607,-298.3058 2201.8686,-272.7373 2323.8122,-259.2551"/>
-<polygon fill="#191970" stroke="#191970" points="2324.2776,-262.7251 2333.8323,-258.1472 2323.5083,-255.7674 2324.2776,-262.7251"/>
 </g>
-<!-- Node28&#45;&gt;Node30 -->
+<!-- Node6&#45;&gt;Node28 -->
+<g id="edge68" class="edge">
+<title>Node6&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M3184.7074,-436.4266C3194.052,-427.9573 3206.4865,-414.6892 3212.0432,-400 3228.6661,-356.0573 3223.2855,-299.5296 3218.7042,-270.1715"/>
+<polygon fill="#191970" stroke="#191970" points="3222.1031,-269.2808 3216.9754,-260.0095 3215.2022,-270.4549 3222.1031,-269.2808"/>
+</g>
+<!-- Node6&#45;&gt;Node29 -->
 <g id="edge36" class="edge">
-<title>Node28&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M1753.9774,-307.9673C1742.9442,-305.763 1731.0669,-303.5983 1720,-302 1543.5167,-276.5114 1494.3197,-292.5565 1314.2105,-266.0602"/>
-<polygon fill="#191970" stroke="#191970" points="1314.5422,-262.571 1304.1346,-264.554 1313.5072,-269.4941 1314.5422,-262.571"/>
+<title>Node6&#45;&gt;Node29</title>
+<path fill="none" stroke="#191970" d="M3112.2981,-445.1168C2973.8218,-442.569 2624.282,-433.1137 2334.0432,-400 2326.8654,-399.1811 2319.3892,-398.155 2311.9543,-397.0229"/>
+<polygon fill="#191970" stroke="#191970" points="2312.2986,-393.5338 2301.8735,-395.4229 2311.2012,-400.4473 2312.2986,-393.5338"/>
 </g>
-<!-- Node31 -->
-<g id="node24" class="node">
-<title>Node31</title>
-<g id="a_node24"><a xlink:href="string_8h.html" target="_top" xlink:title="Runtime String container types. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1322,-235.5 1322,-265.5 1448,-265.5 1448,-235.5 1322,-235.5"/>
-<text text-anchor="start" x="1330" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1385" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
+<!-- Node30 -->
+<g id="node26" class="node">
+<title>Node30</title>
+<g id="a_node26"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="2170.5432,-308 2170.5432,-327 2295.5432,-327 2295.5432,-308 2170.5432,-308"/>
+<text text-anchor="middle" x="2233.0432" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
 </a>
 </g>
 </g>
-<!-- Node28&#45;&gt;Node31 -->
-<g id="edge40" class="edge">
-<title>Node28&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1751.4641,-307.9723C1741.136,-305.912 1730.2042,-303.8104 1720,-302 1630.1721,-286.0629 1526.2755,-270.5403 1458.2987,-260.7755"/>
-<polygon fill="#191970" stroke="#191970" points="1458.5159,-257.271 1448.1207,-259.318 1457.5235,-264.2003 1458.5159,-257.271"/>
-</g>
-<!-- Node30&#45;&gt;Node16 -->
-<g id="edge37" class="edge">
-<title>Node30&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1177.9879,-242.621C1125.1965,-234.9028 1048.3604,-221.0417 984,-199 921.7587,-177.6841 780.7466,-89.7092 719,-67 658.7767,-44.851 585.2816,-29.1694 543.7384,-21.295"/>
-<polygon fill="#191970" stroke="#191970" points="544.1173,-17.8055 533.6458,-19.417 542.8367,-24.6874 544.1173,-17.8055"/>
-</g>
-<!-- Node30&#45;&gt;Node18 -->
-<g id="edge38" class="edge">
-<title>Node30&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1304.1178,-236.3281C1307.1134,-235.8394 1310.0854,-235.3924 1313,-235 1470.1235,-213.8446 1970.5508,-194.9667 2105.3052,-190.1717"/>
-<polygon fill="#191970" stroke="#191970" points="2105.5646,-193.6648 2115.4346,-189.8134 2105.317,-186.6692 2105.5646,-193.6648"/>
+<!-- Node6&#45;&gt;Node30 -->
+<g id="edge70" class="edge">
+<title>Node6&#45;&gt;Node30</title>
+<path fill="none" stroke="#191970" d="M3112.332,-443.9671C2939.89,-437.9803 2455.1279,-419.6007 2386.0432,-400 2335.4831,-385.6551 2282.9073,-352.6477 2254.4568,-333.0079"/>
+<polygon fill="#191970" stroke="#191970" points="2256.296,-330.0227 2246.1004,-327.1416 2252.274,-335.7519 2256.296,-330.0227"/>
 </g>
-<!-- Node30&#45;&gt;Node22 -->
-<g id="edge39" class="edge">
-<title>Node30&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1251.1348,-235.2977C1256.7668,-226.8498 1263.8151,-216.2773 1269.754,-207.369"/>
-<polygon fill="#191970" stroke="#191970" points="1272.8005,-209.1089 1275.4354,-198.8469 1266.9762,-205.226 1272.8005,-209.1089"/>
+<!-- Node6&#45;&gt;Node41 -->
+<g id="edge72" class="edge">
+<title>Node6&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M3112.277,-440.8095C2978.9843,-429.4241 2661.6772,-402.3208 2521.5597,-390.3524"/>
+<polygon fill="#191970" stroke="#191970" points="2521.6406,-386.8467 2511.3789,-389.4828 2521.0448,-393.8213 2521.6406,-386.8467"/>
 </g>
-<!-- Node31&#45;&gt;Node20 -->
-<g id="edge46" class="edge">
-<title>Node31&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1448.157,-236.1823C1474.993,-228.1202 1505.6399,-216.141 1530,-199 1547.878,-186.4202 1563.0971,-166.3953 1572.777,-151.6987"/>
-<polygon fill="#191970" stroke="#191970" points="1575.9272,-153.2655 1578.3104,-142.9422 1570.0096,-149.526 1575.9272,-153.2655"/>
+<!-- Node7&#45;&gt;Node21 -->
+<g id="edge8" class="edge">
+<title>Node7&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M2018.3597,-379.4091C1884.9584,-367.428 1551.2484,-337.4568 1402.5444,-324.1013"/>
+<polygon fill="#191970" stroke="#191970" points="1402.4358,-320.5776 1392.1628,-323.1689 1401.8096,-327.5495 1402.4358,-320.5776"/>
 </g>
-<!-- Node31&#45;&gt;Node21 -->
-<g id="edge49" class="edge">
-<title>Node31&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1448.3897,-237.4066C1452.9895,-236.5588 1457.5625,-235.7469 1462,-235 1570.188,-216.7898 1698.7449,-200.9096 1761.95,-193.4778"/>
-<polygon fill="#191970" stroke="#191970" points="1762.3577,-196.954 1771.8835,-192.3162 1761.5447,-190.0014 1762.3577,-196.954"/>
+<!-- Node7&#45;&gt;Node16 -->
+<g id="edge35" class="edge">
+<title>Node7&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2066.215,-369.3029C2060.9522,-359.3419 2054.7787,-345.8331 2052.0432,-333 2049.171,-319.5249 2051.1946,-315.7516 2052.0432,-302 2055.1603,-251.4899 2066.0432,-239.6062 2066.0432,-189 2066.0432,-189 2066.0432,-189 2066.0432,-133 2066.0432,-72.8142 1990.5253,-39.0962 1945.9487,-24.4359"/>
+<polygon fill="#191970" stroke="#191970" points="1946.7348,-21.0142 1936.1454,-21.3534 1944.635,-27.6918 1946.7348,-21.0142"/>
 </g>
-<!-- Node31&#45;&gt;Node16 -->
-<g id="edge52" class="edge">
-<title>Node31&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1321.7973,-236.8273C1318.8244,-236.2051 1315.8804,-235.5933 1313,-235 1231.8123,-218.2768 1210.1392,-220.1852 1130,-199 978.2242,-158.8772 940.0126,-147.1422 795,-87 776.6402,-79.3855 773.7658,-73.5502 755,-67 681.6566,-41.3993 591.2589,-26.3858 543.7439,-19.6949"/>
-<polygon fill="#191970" stroke="#191970" points="544.1083,-16.2122 533.725,-18.3187 543.1557,-23.1471 544.1083,-16.2122"/>
+<!-- Node7&#45;&gt;Node28 -->
+<g id="edge30" class="edge">
+<title>Node7&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M2131.6555,-375.6917C2147.3117,-373.3788 2164.3204,-370.9799 2180.0432,-369 2333.0011,-349.7387 2384.8517,-397.1362 2525.0432,-333 2543.3423,-324.6284 2539.5929,-310.0325 2558.0432,-302 2667.4995,-254.3474 2975.2277,-277.5895 3094.0432,-266 3107.6974,-264.6681 3122.1888,-263.0179 3136.2197,-261.2956"/>
+<polygon fill="#191970" stroke="#191970" points="3136.8471,-264.7446 3146.3363,-260.0324 3135.9796,-257.7985 3136.8471,-264.7446"/>
 </g>
-<!-- Node31&#45;&gt;Node18 -->
-<g id="edge54" class="edge">
-<title>Node31&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1448.3384,-237.0672C1452.9502,-236.2991 1457.5399,-235.5978 1462,-235 1707.3098,-202.1193 2005.9496,-192.2259 2105.1644,-189.7351"/>
-<polygon fill="#191970" stroke="#191970" points="2105.291,-193.2331 2115.2032,-189.4915 2105.1212,-186.2352 2105.291,-193.2331"/>
+<!-- Node21&#45;&gt;Node22 -->
+<g id="edge9" class="edge">
+<title>Node21&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M1355.3953,-302.3871C1370.001,-293.1467 1387.8241,-280.3599 1401.0432,-266 1424.5318,-240.4843 1431.8225,-232.4327 1441.0432,-199 1443.4066,-190.431 1444.1047,-187.345 1441.0432,-179 1436.9864,-167.9418 1428.9636,-157.7193 1421.2438,-149.7335"/>
+<polygon fill="#191970" stroke="#191970" points="1423.413,-146.9632 1413.7833,-142.5451 1418.556,-152.0041 1423.413,-146.9632"/>
 </g>
-<!-- Node31&#45;&gt;Node22 -->
-<g id="edge42" class="edge">
-<title>Node31&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1359.8034,-235.4554C1343.6728,-225.8241 1322.8362,-213.3828 1306.7864,-203.7996"/>
-<polygon fill="#191970" stroke="#191970" points="1308.3416,-200.6518 1297.9613,-198.5303 1304.753,-206.662 1308.3416,-200.6518"/>
+<!-- Node23 -->
+<g id="node11" class="node">
+<title>Node23</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1817.0432,-179.5 1817.0432,-198.5 1875.0432,-198.5 1875.0432,-179.5 1817.0432,-179.5"/>
+<text text-anchor="middle" x="1846.0432" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
 </g>
-<!-- Node31&#45;&gt;Node7 -->
-<g id="edge41" class="edge">
-<title>Node31&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1324.0216,-235.4964C1290.8605,-225.9711 1254.6688,-212.8589 1245,-199 1235.2435,-185.0154 1241.6173,-165.7673 1249.0098,-151.6661"/>
-<polygon fill="#191970" stroke="#191970" points="1252.1294,-153.263 1254.1048,-142.854 1246.0694,-149.7592 1252.1294,-153.263"/>
+<!-- Node21&#45;&gt;Node23 -->
+<g id="edge10" class="edge">
+<title>Node21&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M1390.5794,-302.4114C1479.769,-280.5052 1650.4282,-238.4501 1806.8564,-199.0326"/>
+<polygon fill="#191970" stroke="#191970" points="1807.8213,-202.3989 1816.6623,-196.5604 1806.11,-195.6113 1807.8213,-202.3989"/>
 </g>
-<!-- Node31&#45;&gt;Node13 -->
-<g id="edge43" class="edge">
-<title>Node31&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1441.6416,-235.4601C1474.7918,-226.1099 1517.2289,-213.1896 1554,-199 1693.8586,-145.0302 1853.5921,-62.8565 1916.0152,-29.9062"/>
-<polygon fill="#191970" stroke="#191970" points="1917.9812,-32.8256 1925.1803,-25.0524 1914.705,-26.6396 1917.9812,-32.8256"/>
+<!-- Node21&#45;&gt;Node18 -->
+<g id="edge11" class="edge">
+<title>Node21&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1265.9893,-311.5371C1177.1781,-302.6307 1020.0489,-284.8693 966.0432,-266 804.7231,-209.6354 641.1464,-77.9898 586.8377,-31.8381"/>
+<polygon fill="#191970" stroke="#191970" points="588.9482,-29.0374 579.0767,-25.1897 584.3941,-34.3535 588.9482,-29.0374"/>
 </g>
-<!-- Node31&#45;&gt;Node23 -->
-<g id="edge44" class="edge">
-<title>Node31&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1321.7293,-235.9816C1264.7351,-222.7521 1187.8292,-204.4675 1174,-199 1139.4983,-185.3594 1102.6961,-163.1855 1079.7755,-148.351"/>
-<polygon fill="#191970" stroke="#191970" points="1081.4151,-145.2406 1071.1346,-142.679 1077.5738,-151.0925 1081.4151,-145.2406"/>
+<!-- Node21&#45;&gt;Node20 -->
+<g id="edge12" class="edge">
+<title>Node21&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1357.989,-302.3443C1394.7083,-283.7467 1460.4402,-252.5313 1520.0432,-235 1613.8329,-207.4133 1641.446,-219.462 1737.0432,-199 1738.5025,-198.6877 1739.9898,-198.3557 1741.4897,-198.0097"/>
+<polygon fill="#191970" stroke="#191970" points="1742.5264,-201.3594 1751.408,-195.583 1740.8627,-194.5599 1742.5264,-201.3594"/>
 </g>
-<!-- Node31&#45;&gt;Node8 -->
-<g id="edge45" class="edge">
-<title>Node31&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1432.5704,-235.4868C1453.6799,-227.1475 1477.8041,-215.1426 1496,-199 1525.7089,-172.6435 1509.5003,-144.3319 1543,-123 1569.468,-106.1457 1759.1544,-89.8019 1864.2789,-81.9087"/>
-<polygon fill="#191970" stroke="#191970" points="1864.7777,-85.3813 1874.4903,-81.1486 1864.258,-78.4006 1864.7777,-85.3813"/>
+<!-- Node24 -->
+<g id="node14" class="node">
+<title>Node24</title>
+<g id="a_node14"><a xlink:href="runtime_2container_2base_8h.html" target="_top" xlink:title="Base utilities for common POD(plain old data) container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="1160.5432,-179.5 1160.5432,-198.5 1215.5432,-198.5 1215.5432,-179.5 1160.5432,-179.5"/>
+<text text-anchor="middle" x="1188.0432" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
+</a>
 </g>
-<!-- Node31&#45;&gt;Node14 -->
-<g id="edge50" class="edge">
-<title>Node31&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1448.1047,-238.0026C1523.0729,-223.2859 1651.9748,-198.3912 1763,-179 2072.9376,-124.8676 2149.9395,-107.1228 2462,-67 2650.2017,-42.8022 2877.3172,-24.3005 2959.9639,-17.9162"/>
-<polygon fill="#191970" stroke="#191970" points="2960.2809,-21.4023 2969.9836,-17.1471 2959.7451,-14.4228 2960.2809,-21.4023"/>
 </g>
-<!-- Node31&#45;&gt;Node15 -->
-<g id="edge53" class="edge">
-<title>Node31&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1322.928,-235.4309C1295.3729,-227.1332 1263.3014,-215.1774 1237,-199 1160.6601,-152.045 1090.144,-69.8357 1061.0522,-33.5667"/>
-<polygon fill="#191970" stroke="#191970" points="1063.5235,-31.0497 1054.5702,-25.385 1058.0367,-35.3967 1063.5235,-31.0497"/>
+<!-- Node21&#45;&gt;Node24 -->
+<g id="edge13" class="edge">
+<title>Node21&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M1305.3654,-302.4843C1290.8235,-292.8055 1272.1531,-279.5363 1257.0432,-266 1236.2868,-247.4052 1215.3291,-223.0451 1202.048,-206.7511"/>
+<polygon fill="#191970" stroke="#191970" points="1204.5024,-204.218 1195.5094,-198.6166 1199.0465,-208.6036 1204.5024,-204.218"/>
 </g>
-<!-- Node32 -->
-<g id="node25" class="node">
-<title>Node32</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1434.5,-179.5 1434.5,-198.5 1487.5,-198.5 1487.5,-179.5 1434.5,-179.5"/>
-<text text-anchor="middle" x="1461" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstddef</text>
+<!-- Node24&#45;&gt;Node22 -->
+<g id="edge27" class="edge">
+<title>Node24&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M1215.5819,-181.7936C1252.1598,-172.2218 1317.12,-155.2229 1359.8888,-144.0311"/>
+<polygon fill="#191970" stroke="#191970" points="1360.9328,-147.3758 1369.721,-141.4582 1359.1607,-140.6038 1360.9328,-147.3758"/>
 </g>
-<!-- Node31&#45;&gt;Node32 -->
-<g id="edge47" class="edge">
-<title>Node31&#45;&gt;Node32</title>
-<path fill="none" stroke="#191970" d="M1403.7865,-235.2977C1415.1053,-226.1384 1429.5105,-214.4816 1441.0346,-205.1562"/>
-<polygon fill="#191970" stroke="#191970" points="1443.2595,-207.8583 1448.8314,-198.8469 1438.8561,-202.4167 1443.2595,-207.8583"/>
+<!-- Node24&#45;&gt;Node18 -->
+<g id="edge29" class="edge">
+<title>Node24&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1190.4519,-179.057C1193.2759,-164.7329 1195.9655,-138.3322 1182.0432,-123 1103.0304,-35.9851 716.0422,-19.2395 600.6063,-16.1629"/>
+<polygon fill="#191970" stroke="#191970" points="600.6911,-12.664 590.6067,-15.913 600.5161,-19.6618 600.6911,-12.664"/>
 </g>
-<!-- Node33 -->
-<g id="node26" class="node">
-<title>Node33</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1366,-179.5 1366,-198.5 1416,-198.5 1416,-179.5 1366,-179.5"/>
-<text text-anchor="middle" x="1391" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstring</text>
+<!-- Node9 -->
+<g id="node15" class="node">
+<title>Node9</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1084.5432,-123.5 1084.5432,-142.5 1173.5432,-142.5 1173.5432,-123.5 1084.5432,-123.5"/>
+<text text-anchor="middle" x="1129.0432" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
 </g>
-<!-- Node31&#45;&gt;Node33 -->
-<g id="edge48" class="edge">
-<title>Node31&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1386.4831,-235.2977C1387.2553,-227.3834 1388.2093,-217.6043 1389.0414,-209.0759"/>
-<polygon fill="#191970" stroke="#191970" points="1392.5517,-209.1396 1390.0393,-198.8469 1385.5848,-208.4598 1392.5517,-209.1396"/>
+<!-- Node24&#45;&gt;Node9 -->
+<g id="edge14" class="edge">
+<title>Node24&#45;&gt;Node9</title>
+<path fill="none" stroke="#191970" d="M1177.7662,-179.2455C1169.2214,-171.1352 1156.8654,-159.4075 1146.7347,-149.7919"/>
+<polygon fill="#191970" stroke="#191970" points="1148.8651,-146.9884 1139.2025,-142.6427 1144.0461,-152.0655 1148.8651,-146.9884"/>
 </g>
-<!-- Node34 -->
-<g id="node27" class="node">
-<title>Node34</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="572.5,-179.5 572.5,-198.5 665.5,-198.5 665.5,-179.5 572.5,-179.5"/>
-<text text-anchor="middle" x="619" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_map</text>
+<!-- Node15 -->
+<g id="node16" class="node">
+<title>Node15</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="3047.5432,-6 3047.5432,-25 3172.5432,-25 3172.5432,-6 3047.5432,-6"/>
+<text text-anchor="middle" x="3110.0432" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
 </g>
-<!-- Node31&#45;&gt;Node34 -->
-<g id="edge51" class="edge">
-<title>Node31&#45;&gt;Node34</title>
-<path fill="none" stroke="#191970" d="M1321.877,-236.3663C1318.8828,-235.8674 1315.9125,-235.4078 1313,-235 1077.9369,-202.0869 794.6874,-192.5972 675.8391,-189.9649"/>
-<polygon fill="#191970" stroke="#191970" points="675.6057,-186.4592 665.5333,-189.7448 675.4562,-193.4576 675.6057,-186.4592"/>
+<!-- Node24&#45;&gt;Node15 -->
+<g id="edge15" class="edge">
+<title>Node24&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M1215.9061,-186.5332C1238.7772,-184.5086 1272.0083,-181.5675 1301.0432,-179 2045.4832,-113.1715 2232.5558,-106.8328 2976.0432,-31 2995.8831,-28.9764 3017.3509,-26.584 3037.1685,-24.2921"/>
+<polygon fill="#191970" stroke="#191970" points="3037.8358,-27.7381 3047.364,-23.1052 3037.0263,-20.7851 3037.8358,-27.7381"/>
 </g>
-<!-- Node47&#45;&gt;Node14 -->
-<g id="edge135" class="edge">
-<title>Node47&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1498.5743,-892.0755C1736.3508,-882.5588 2743.9027,-836.8857 3040,-736 3177.8117,-689.045 3315,-703.5913 3315,-558 3315,-558 3315,-558 3315,-133 3315,-98.6999 3303.2514,-86.4513 3275,-67 3234.5756,-39.1675 3089.3925,-23.7144 3024.1499,-18.0387"/>
-<polygon fill="#191970" stroke="#191970" points="3024.2886,-14.538 3014.0288,-17.1812 3023.6977,-21.513 3024.2886,-14.538"/>
+<!-- Node24&#45;&gt;Node25 -->
+<g id="edge16" class="edge">
+<title>Node24&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M1215.8781,-186.1679C1238.733,-183.9313 1271.956,-180.8833 1301.0432,-179 1505.2339,-165.7792 2129.6906,-143.2398 2346.1932,-135.6118"/>
+<polygon fill="#191970" stroke="#191970" points="2346.3205,-139.1095 2356.1912,-135.2599 2346.0742,-132.1139 2346.3205,-139.1095"/>
 </g>
-<!-- Node47&#45;&gt;Node28 -->
-<g id="edge109" class="edge">
-<title>Node47&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1397.3612,-892.7441C1239.4168,-888.5705 762.4857,-873.9387 698,-848 656.495,-831.3051 605.8841,-808.3228 632,-772 843.8433,-477.3621 1070.7526,-650.9182 1406,-512 1547.2803,-453.457 1708.6671,-366.4182 1770.765,-332.1339"/>
-<polygon fill="#191970" stroke="#191970" points="1772.8081,-335.0034 1779.8606,-327.0968 1769.4168,-328.8797 1772.8081,-335.0034"/>
+<!-- Node24&#45;&gt;Node10 -->
+<g id="edge26" class="edge">
+<title>Node24&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1194.9274,-179.0471C1206.2334,-163.6678 1230.3656,-134.6837 1259.0432,-123 1342.8828,-88.8425 2712.8624,-79.1786 3040.2009,-77.3522"/>
+<polygon fill="#191970" stroke="#191970" points="3040.5121,-80.8506 3050.4926,-77.2955 3040.4735,-73.8507 3040.5121,-80.8506"/>
 </g>
-<!-- Node48 -->
-<g id="node30" class="node">
-<title>Node48</title>
-<g id="a_node30"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1182,-716.5 1182,-735.5 1280,-735.5 1280,-716.5 1182,-716.5"/>
-<text text-anchor="middle" x="1231" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/function.h</text>
-</a>
+<!-- Node27 -->
+<g id="node23" class="node">
+<title>Node27</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1268.5432,-123.5 1268.5432,-142.5 1351.5432,-142.5 1351.5432,-123.5 1268.5432,-123.5"/>
+<text text-anchor="middle" x="1310.0432" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">initializer_list</text>
 </g>
+<!-- Node24&#45;&gt;Node27 -->
+<g id="edge28" class="edge">
+<title>Node24&#45;&gt;Node27</title>
+<path fill="none" stroke="#191970" d="M1209.0158,-179.3733C1228.6,-170.3838 1258.0394,-156.8706 1280.0942,-146.7471"/>
+<polygon fill="#191970" stroke="#191970" points="1281.7162,-149.8538 1289.3444,-142.5011 1278.796,-143.4919 1281.7162,-149.8538"/>
 </g>
-<!-- Node47&#45;&gt;Node48 -->
-<g id="edge80" class="edge">
-<title>Node47&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1423.1419,-884.4239C1386.37,-869.2909 1316.8904,-836.8884 1271,-792 1257.1267,-778.4296 1246.1473,-759.0871 1239.2283,-744.8484"/>
-<polygon fill="#191970" stroke="#191970" points="1242.2975,-743.1465 1234.9245,-735.5379 1235.9435,-746.0837 1242.2975,-743.1465"/>
+<!-- Node25&#45;&gt;Node18 -->
+<g id="edge25" class="edge">
+<title>Node25&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M2356.4508,-128.9042C2058.1931,-109.9914 819.4114,-31.4394 601.0089,-17.5904"/>
+<polygon fill="#191970" stroke="#191970" points="601.0312,-14.0848 590.8297,-16.9449 600.5881,-21.0708 601.0312,-14.0848"/>
 </g>
-<!-- Node54 -->
-<g id="node36" class="node">
-<title>Node54</title>
-<g id="a_node36"><a xlink:href="buffer_8h.html" target="_top" xlink:title="Symbolic n&#45;dimensional array, to represent a memory buffer. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1388.5,-716.5 1388.5,-735.5 1477.5,-735.5 1477.5,-716.5 1388.5,-716.5"/>
-<text text-anchor="middle" x="1433" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/buffer.h</text>
-</a>
+<!-- Node25&#45;&gt;Node10 -->
+<g id="edge17" class="edge">
+<title>Node25&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2485.553,-127.7568C2615.1537,-117.2232 2906.1656,-93.5706 3040.1238,-82.6829"/>
+<polygon fill="#191970" stroke="#191970" points="3040.5845,-86.1571 3050.268,-81.8584 3040.0173,-79.1801 3040.5845,-86.1571"/>
 </g>
+<!-- Node25&#45;&gt;Node17 -->
+<g id="edge24" class="edge">
+<title>Node25&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M2462.4277,-123.484C2557.3174,-101.6649 2789.9004,-48.1845 2887.6387,-25.7104"/>
+<polygon fill="#191970" stroke="#191970" points="2888.5353,-29.0957 2897.4966,-23.4437 2886.9666,-22.2737 2888.5353,-29.0957"/>
 </g>
-<!-- Node47&#45;&gt;Node54 -->
-<g id="edge110" class="edge">
-<title>Node47&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M1464.7373,-884.2988C1476.7584,-876.2823 1492.023,-863.6887 1499,-848 1502.6119,-839.878 1501.5175,-836.5249 1499,-828 1489.217,-794.8727 1464.9415,-762.3878 1448.719,-743.28"/>
-<polygon fill="#191970" stroke="#191970" points="1451.3314,-740.9505 1442.1191,-735.7173 1446.0572,-745.5531 1451.3314,-740.9505"/>
+<!-- Node10&#45;&gt;Node18 -->
+<g id="edge22" class="edge">
+<title>Node10&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M3050.3336,-75.5554C2695.3398,-66.9669 868.7947,-22.7762 600.8688,-16.2942"/>
+<polygon fill="#191970" stroke="#191970" points="600.7156,-12.7895 590.6338,-16.0465 600.5462,-19.7875 600.7156,-12.7895"/>
 </g>
-<!-- Node56 -->
-<g id="node37" class="node">
-<title>Node56</title>
-<g id="a_node37"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1280.5,-772.5 1280.5,-791.5 1363.5,-791.5 1363.5,-772.5 1280.5,-772.5"/>
-<text text-anchor="middle" x="1322" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/expr.h</text>
-</a>
+<!-- Node10&#45;&gt;Node15 -->
+<g id="edge19" class="edge">
+<title>Node10&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M3110.0432,-67.3906C3110.0432,-58.8657 3110.0432,-46.1392 3110.0432,-35.4235"/>
+<polygon fill="#191970" stroke="#191970" points="3113.5433,-35.2448 3110.0432,-25.2449 3106.5433,-35.2449 3113.5433,-35.2448"/>
 </g>
+<!-- Node10&#45;&gt;Node11 -->
+<g id="edge18" class="edge">
+<title>Node10&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M3169.5796,-67.6595C3238.5163,-56.8442 3352.1647,-39.0141 3427.0675,-27.2628"/>
+<polygon fill="#191970" stroke="#191970" points="3428.0341,-30.654 3437.3708,-25.6463 3426.9491,-23.7386 3428.0341,-30.654"/>
 </g>
-<!-- Node47&#45;&gt;Node56 -->
-<g id="edge115" class="edge">
-<title>Node47&#45;&gt;Node56</title>
-<path fill="none" stroke="#191970" d="M1437.2573,-884.4509C1415.6841,-865.2747 1366.8833,-821.8962 1340.2237,-798.1988"/>
-<polygon fill="#191970" stroke="#191970" points="1342.5149,-795.5526 1332.7155,-791.5249 1337.8643,-800.7845 1342.5149,-795.5526"/>
+<!-- Node10&#45;&gt;Node16 -->
+<g id="edge20" class="edge">
+<title>Node10&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M3050.3242,-73.9292C2836.0005,-62.9083 2109.6491,-25.5583 1946.6787,-17.1782"/>
+<polygon fill="#191970" stroke="#191970" points="1946.5169,-13.6653 1936.3503,-16.6471 1946.1573,-20.6561 1946.5169,-13.6653"/>
 </g>
-<!-- Node57 -->
-<g id="node38" class="node">
-<title>Node57</title>
-<g id="a_node38"><a xlink:href="stmt_8h.html" target="_top" xlink:title="TIR statements. ">
-<polygon fill="#ffffff" stroke="#000000" points="1406,-828.5 1406,-847.5 1490,-847.5 1490,-828.5 1406,-828.5"/>
-<text text-anchor="middle" x="1448" y="-835.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/stmt.h</text>
-</a>
+<!-- Node10&#45;&gt;Node17 -->
+<g id="edge21" class="edge">
+<title>Node10&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M3082.2307,-67.3906C3051.795,-56.8749 3002.8555,-39.966 2969.1607,-28.3243"/>
+<polygon fill="#191970" stroke="#191970" points="2970.2552,-24.9995 2959.6604,-25.0419 2967.9692,-31.6157 2970.2552,-24.9995"/>
 </g>
+<!-- Node19 -->
+<g id="node22" class="node">
+<title>Node19</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2678.0432,-6 2678.0432,-25 2728.0432,-25 2728.0432,-6 2678.0432,-6"/>
+<text text-anchor="middle" x="2703.0432" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">atomic</text>
 </g>
-<!-- Node47&#45;&gt;Node57 -->
-<g id="edge129" class="edge">
-<title>Node47&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M1448,-884.2455C1448,-876.9382 1448,-866.6944 1448,-857.7046"/>
-<polygon fill="#191970" stroke="#191970" points="1451.5001,-857.6426 1448,-847.6427 1444.5001,-857.6427 1451.5001,-857.6426"/>
+<!-- Node10&#45;&gt;Node19 -->
+<g id="edge23" class="edge">
+<title>Node10&#45;&gt;Node19</title>
+<path fill="none" stroke="#191970" d="M3050.4261,-67.9915C2964.3195,-54.9803 2807.8476,-31.3365 2738.1576,-20.806"/>
+<polygon fill="#191970" stroke="#191970" points="2738.5431,-17.3246 2728.1324,-19.2911 2737.4972,-24.246 2738.5431,-17.3246"/>
 </g>
-<!-- Node48&#45;&gt;Node19 -->
-<g id="edge100" class="edge">
-<title>Node48&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M1237.0964,-716.4485C1264.9675,-673.1991 1384.3073,-492.484 1510,-369 1521.0322,-358.1616 1534.3393,-347.5169 1546.1771,-338.7442"/>
-<polygon fill="#191970" stroke="#191970" points="1548.4494,-341.4196 1554.4837,-332.7111 1544.3357,-335.7558 1548.4494,-341.4196"/>
+<!-- Node28&#45;&gt;Node15 -->
+<g id="edge32" class="edge">
+<title>Node28&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M3218.1069,-240.7641C3226.6317,-211.6858 3247.7423,-123.9672 3212.0432,-67 3200.4107,-48.4372 3179.8341,-36.3062 3160.1943,-28.5121"/>
+<polygon fill="#191970" stroke="#191970" points="3161.2653,-25.1771 3150.6715,-25.0252 3158.8583,-31.7503 3161.2653,-25.1771"/>
 </g>
-<!-- Node48&#45;&gt;Node14 -->
-<g id="edge107" class="edge">
-<title>Node48&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1280.3322,-722.6476C1532.4152,-705.5035 2665.5486,-628.2678 2683,-624 2784.3803,-599.207 3087,-488.8679 3087,-384.5 3087,-384.5 3087,-384.5 3087,-133 3087,-88.262 3044.839,-50.9849 3016.6909,-31.0828"/>
-<polygon fill="#191970" stroke="#191970" points="3018.3512,-27.979 3008.1155,-25.2441 3014.4116,-33.7652 3018.3512,-27.979"/>
+<!-- Node28&#45;&gt;Node11 -->
+<g id="edge31" class="edge">
+<title>Node28&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M3253.1473,-240.905C3302.0443,-226.8533 3386.8849,-196.3713 3440.0432,-143 3468.9991,-113.9281 3486.5343,-68.6533 3495.2443,-40.6055"/>
+<polygon fill="#191970" stroke="#191970" points="3498.6736,-41.3512 3498.1558,-30.769 3491.9615,-39.3644 3498.6736,-41.3512"/>
 </g>
-<!-- Node48&#45;&gt;Node15 -->
-<g id="edge108" class="edge">
-<title>Node48&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1206.4778,-716.4595C1075.8788,-665.4773 468,-425.9522 468,-384.5 468,-384.5 468,-384.5 468,-317.5 468,-250.0582 475.9232,-219.3001 530,-179 679.3307,-67.7131 906.6125,-30.7862 1002.1856,-19.7509"/>
-<polygon fill="#191970" stroke="#191970" points="1002.8167,-23.2023 1012.3676,-18.6164 1002.0415,-16.2454 1002.8167,-23.2023"/>
+<!-- Node28&#45;&gt;Node16 -->
+<g id="edge33" class="edge">
+<title>Node28&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M3193.7014,-240.8757C3122.5897,-209.3129 2886.521,-108.5916 2680.0432,-67 2536.2594,-38.037 2073.55,-20.8099 1946.5518,-16.5463"/>
+<polygon fill="#191970" stroke="#191970" points="1946.4299,-13.0404 1936.3192,-16.2066 1946.1975,-20.0366 1946.4299,-13.0404"/>
 </g>
-<!-- Node48&#45;&gt;Node31 -->
-<g id="edge106" class="edge">
-<title>Node48&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1228.1906,-716.4187C1214.0195,-665.909 1155.3435,-428.1108 1267,-302 1279.6896,-287.6677 1297.0947,-277.1612 1314.6402,-269.5247"/>
-<polygon fill="#191970" stroke="#191970" points="1316.4253,-272.5774 1324.373,-265.5714 1313.791,-266.092 1316.4253,-272.5774"/>
+<!-- Node28&#45;&gt;Node17 -->
+<g id="edge34" class="edge">
+<title>Node28&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M3216.4515,-240.8707C3220.2939,-210.5558 3227.6498,-116.1851 3179.0432,-67 3164.9123,-52.7008 3044.0344,-32.4268 2976.7713,-22.1"/>
+<polygon fill="#191970" stroke="#191970" points="2977.271,-18.6358 2966.858,-20.5904 2976.2171,-25.556 2977.271,-18.6358"/>
 </g>
-<!-- Node51 -->
-<g id="node31" class="node">
-<title>Node51</title>
-<g id="a_node31"><a xlink:href="ir_2expr_8h.html" target="_top" xlink:title="Base expr nodes in TVM. ">
-<polygon fill="#ffffff" stroke="#000000" points="1576.5,-660.5 1576.5,-679.5 1655.5,-679.5 1655.5,-660.5 1576.5,-660.5"/>
-<text text-anchor="middle" x="1616" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/expr.h</text>
-</a>
+<!-- Node29&#45;&gt;Node16 -->
+<g id="edge66" class="edge">
+<title>Node29&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2194.9652,-369.488C2164.3713,-359.3197 2129.2249,-345.5086 2119.0432,-333 2042.2158,-238.6146 2143.5609,-149.4479 2054.0432,-67 2023.7503,-39.0995 1977.0705,-25.9894 1946.0515,-20.0605"/>
+<polygon fill="#191970" stroke="#191970" points="1946.6355,-16.6096 1936.1787,-18.3142 1945.4162,-23.5026 1946.6355,-16.6096"/>
 </g>
+<!-- Node29&#45;&gt;Node28 -->
+<g id="edge37" class="edge">
+<title>Node29&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M2301.7338,-378.6014C2364.9903,-371.231 2470.0173,-356.6298 2558.0432,-333 2595.6729,-322.8987 2602.0229,-310.5135 2640.0432,-302 2837.5632,-257.7716 2892.7104,-286.8654 3094.0432,-266 3107.6043,-264.5946 3121.9965,-262.9159 3135.9443,-261.1901"/>
+<polygon fill="#191970" stroke="#191970" points="3136.517,-264.6458 3146.0037,-259.9285 3135.6458,-257.7002 3136.517,-264.6458"/>
 </g>
-<!-- Node48&#45;&gt;Node51 -->
-<g id="edge81" class="edge">
-<title>Node48&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1280.1516,-718.8507C1353.9173,-708.1211 1492.4717,-687.9678 1566.4626,-677.2054"/>
-<polygon fill="#191970" stroke="#191970" points="1567.069,-680.6542 1576.4611,-675.7511 1566.0614,-673.727 1567.069,-680.6542"/>
+<!-- Node29&#45;&gt;Node30 -->
+<g id="edge38" class="edge">
+<title>Node29&#45;&gt;Node30</title>
+<path fill="none" stroke="#191970" d="M2242.3203,-369.2967C2240.614,-359.7699 2238.3976,-347.3954 2236.5592,-337.1306"/>
+<polygon fill="#191970" stroke="#191970" points="2239.9628,-336.2813 2234.7546,-327.055 2233.0725,-337.5154 2239.9628,-336.2813"/>
 </g>
 <!-- Node40 -->
-<g id="node35" class="node">
+<g id="node32" class="node">
 <title>Node40</title>
-<g id="a_node35"><a xlink:href="map_8h.html" target="_top" xlink:title="Runtime Map container types. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="686,-235.5 686,-265.5 812,-265.5 812,-235.5 686,-235.5"/>
-<text text-anchor="start" x="694" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="749" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/map.h</text>
-</a>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2132.5432,-241 2132.5432,-260 2197.5432,-260 2197.5432,-241 2132.5432,-241"/>
+<text text-anchor="middle" x="2165.0432" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">functional</text>
 </g>
+<!-- Node29&#45;&gt;Node40 -->
+<g id="edge65" class="edge">
+<title>Node29&#45;&gt;Node40</title>
+<path fill="none" stroke="#191970" d="M2188.5237,-373.7075C2165.7173,-366.2773 2141.6251,-353.8177 2128.0432,-333 2114.3331,-311.9856 2132.1688,-285.0499 2147.5016,-267.78"/>
+<polygon fill="#191970" stroke="#191970" points="2150.3784,-269.8299 2154.6784,-260.1468 2145.2785,-265.0349 2150.3784,-269.8299"/>
 </g>
-<!-- Node48&#45;&gt;Node40 -->
-<g id="edge101" class="edge">
-<title>Node48&#45;&gt;Node40</title>
-<path fill="none" stroke="#191970" d="M1223.0746,-716.4116C1173.5605,-656.5738 907.3591,-335.852 862,-302 844.182,-288.7022 822.3824,-277.7609 802.7004,-269.4354"/>
-<polygon fill="#191970" stroke="#191970" points="803.8157,-266.1101 793.2352,-265.5596 801.1631,-272.588 803.8157,-266.1101"/>
+<!-- Node30&#45;&gt;Node18 -->
+<g id="edge63" class="edge">
+<title>Node30&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M2170.2256,-316.4282C1939.4913,-312.2673 1146.4918,-295.914 1037.0432,-266 988.6679,-252.7783 684.4723,-81.5 593.6284,-30.0299"/>
+<polygon fill="#191970" stroke="#191970" points="595.2314,-26.9154 584.8061,-25.0279 591.7789,-33.0048 595.2314,-26.9154"/>
 </g>
-<!-- Node51&#45;&gt;Node3 -->
-<g id="edge93" class="edge">
-<title>Node51&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1630.9727,-660.3477C1668.0939,-636.9119 1768.0294,-576.8184 1860,-548 1947.4131,-520.6097 2053.5309,-509.4313 2118.3882,-504.9365"/>
-<polygon fill="#191970" stroke="#191970" points="2118.6872,-508.4245 2128.434,-504.2711 2118.2245,-501.4398 2118.6872,-508.4245"/>
+<!-- Node30&#45;&gt;Node20 -->
+<g id="edge64" class="edge">
+<title>Node30&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M2233.1922,-307.9993C2232.8987,-290.446 2229.5042,-253.0795 2207.0432,-235 2139.9947,-181.0306 1911.3293,-213.6548 1808.7965,-198.5303"/>
+<polygon fill="#191970" stroke="#191970" points="1809.1382,-195.0378 1798.6905,-196.7976 1807.9552,-201.9372 1809.1382,-195.0378"/>
 </g>
-<!-- Node51&#45;&gt;Node20 -->
-<g id="edge96" class="edge">
-<title>Node51&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1610.817,-660.0518C1593.0233,-625.357 1534.1934,-506.0486 1510,-400 1500.2997,-357.4802 1488.508,-342.3738 1505,-302 1529.2448,-242.6467 1581.7552,-258.3533 1606,-199 1612.4685,-183.1645 1605.0468,-164.6505 1597.1238,-151.2681"/>
-<polygon fill="#191970" stroke="#191970" points="1599.8392,-149.023 1591.4576,-142.5423 1593.9684,-152.8353 1599.8392,-149.023"/>
+<!-- Node30&#45;&gt;Node10 -->
+<g id="edge60" class="edge">
+<title>Node30&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2249.2756,-307.9652C2278.7621,-291.0165 2343.4953,-255.5034 2402.0432,-235 2629.1601,-155.4642 2911.6659,-106.6365 3040.3515,-86.9856"/>
+<polygon fill="#191970" stroke="#191970" points="3041.1011,-90.412 3050.4638,-85.4532 3040.0523,-83.4911 3041.1011,-90.412"/>
 </g>
-<!-- Node51&#45;&gt;Node8 -->
-<g id="edge95" class="edge">
-<title>Node51&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1622.4986,-660.4713C1644.4875,-628.3349 1718.3897,-521.1854 1784,-436 1820.2169,-388.9776 1841.798,-385.7524 1869,-333 1910.9521,-251.643 1927.001,-141.5051 1932.0351,-96.9411"/>
-<polygon fill="#191970" stroke="#191970" points="1935.5304,-97.1708 1933.1092,-86.8563 1928.5697,-96.4294 1935.5304,-97.1708"/>
+<!-- Node30&#45;&gt;Node11 -->
+<g id="edge39" class="edge">
+<title>Node30&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M2273.1178,-307.963C2453.5802,-265.016 3192.2445,-89.2267 3428.799,-32.9309"/>
+<polygon fill="#191970" stroke="#191970" points="3429.7603,-36.2999 3438.6783,-30.5798 3428.1397,-29.4901 3429.7603,-36.2999"/>
 </g>
-<!-- Node51&#45;&gt;Node14 -->
-<g id="edge98" class="edge">
-<title>Node51&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1655.6499,-669.099C1840.2043,-664.7966 2608.0889,-645.6542 2654,-624 2725.4399,-590.305 2712.8034,-543.1409 2773,-492 2795.5492,-472.843 2804.9712,-473.2648 2829,-456 2861.3558,-432.7522 2870.9051,-428.2495 2899,-400 2925.7738,-373.0789 2929.1724,-363.3394 2952,-333 2979.1024,-296.979 3011,-295.5783 3011,-250.5 3011,-250.5 3011,-250.5 3011,-133 3011,-98.1936 3002.7747,-58.3226 2997.1479,-35.1197"/>
-<polygon fill="#191970" stroke="#191970" points="3000.477,-34.0065 2994.6419,-25.1633 2993.6887,-35.7152 3000.477,-34.0065"/>
+<!-- Node30&#45;&gt;Node19 -->
+<g id="edge61" class="edge">
+<title>Node30&#45;&gt;Node19</title>
+<path fill="none" stroke="#191970" d="M2247.8857,-307.963C2315.7155,-264.3787 2596.4683,-83.98 2679.6921,-30.5043"/>
+<polygon fill="#191970" stroke="#191970" points="2681.6763,-33.3897 2688.1972,-25.0394 2677.8922,-27.5006 2681.6763,-33.3897"/>
 </g>
-<!-- Node51&#45;&gt;Node15 -->
-<g id="edge99" class="edge">
-<title>Node51&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1591.7357,-660.3984C1484.0914,-617.4058 1050.7575,-440.0268 954,-333 891.3376,-263.687 911.4788,-210.597 944,-123 953.6168,-97.0968 952.9422,-87.9135 971,-67 984.2353,-51.6716 1003.055,-38.8722 1018.655,-29.8675"/>
-<polygon fill="#191970" stroke="#191970" points="1020.3559,-32.9265 1027.4014,-25.0138 1016.9592,-26.8058 1020.3559,-32.9265"/>
+<!-- Node30&#45;&gt;Node28 -->
+<g id="edge59" class="edge">
+<title>Node30&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M2295.6392,-310.3467C2321.9836,-307.4915 2352.9801,-304.3349 2381.0432,-302 2657.7553,-278.977 2986.9108,-261.5918 3135.3986,-254.2844"/>
+<polygon fill="#191970" stroke="#191970" points="3135.9577,-257.7613 3145.7743,-253.7757 3135.6149,-250.7697 3135.9577,-257.7613"/>
 </g>
-<!-- Node51&#45;&gt;Node31 -->
-<g id="edge94" class="edge">
-<title>Node51&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1604.6553,-660.4419C1567.4883,-628.3277 1449.1901,-519.5839 1404,-400 1388.3494,-358.5847 1385.0875,-306.5995 1384.6744,-276.1139"/>
-<polygon fill="#191970" stroke="#191970" points="1388.1737,-275.9003 1384.6335,-265.9144 1381.1737,-275.9284 1388.1737,-275.9003"/>
+<!-- Node30&#45;&gt;Node31 -->
+<g id="edge40" class="edge">
+<title>Node30&#45;&gt;Node31</title>
+<path fill="none" stroke="#191970" d="M2170.3764,-316.6712C1971.6171,-313.7331 1337.5799,-301.9816 814.0432,-266 788.0579,-264.2141 759.6581,-261.5383 734.4578,-258.9103"/>
+<polygon fill="#191970" stroke="#191970" points="734.5513,-255.4007 724.2386,-257.8292 733.8148,-262.3619 734.5513,-255.4007"/>
 </g>
-<!-- Node52 -->
-<g id="node32" class="node">
-<title>Node52</title>
-<g id="a_node32"><a xlink:href="ir_2span_8h.html" target="_top" xlink:title="Span information for debugging purposes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2411.5,-548.5 2411.5,-567.5 2492.5,-567.5 2492.5,-548.5 2411.5,-548.5"/>
-<text text-anchor="middle" x="2452" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/span.h</text>
-</a>
+<!-- Node30&#45;&gt;Node33 -->
+<g id="edge43" class="edge">
+<title>Node30&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M2170.1934,-312.8419C2007.86,-300.8105 1575.1972,-268.7437 1402.1078,-255.9152"/>
+<polygon fill="#191970" stroke="#191970" points="1402.3084,-252.4205 1392.077,-255.1718 1401.791,-259.4014 1402.3084,-252.4205"/>
 </g>
+<!-- Node30&#45;&gt;Node40 -->
+<g id="edge62" class="edge">
+<title>Node30&#45;&gt;Node40</title>
+<path fill="none" stroke="#191970" d="M2223.3005,-307.9005C2212.5955,-297.353 2195.1963,-280.2096 2182.1953,-267.3998"/>
+<polygon fill="#191970" stroke="#191970" points="2184.3477,-264.607 2174.7679,-260.0817 2179.4347,-269.5933 2184.3477,-264.607"/>
 </g>
-<!-- Node51&#45;&gt;Node52 -->
-<g id="edge82" class="edge">
-<title>Node51&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M1655.7596,-664.6734C1793.1087,-646.2725 2247.5498,-585.3905 2400.9274,-564.8423"/>
-<polygon fill="#191970" stroke="#191970" points="2401.5689,-568.2877 2411.0155,-563.4907 2400.6393,-561.3497 2401.5689,-568.2877"/>
+<!-- Node31&#45;&gt;Node18 -->
+<g id="edge41" class="edge">
+<title>Node31&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M655.0842,-235.4422C638.828,-194.3648 593.7264,-80.3983 575.6655,-34.7606"/>
+<polygon fill="#191970" stroke="#191970" points="578.8062,-33.185 571.8719,-25.1746 572.2973,-35.7609 578.8062,-33.185"/>
 </g>
-<!-- Node53 -->
-<g id="node33" class="node">
-<title>Node53</title>
-<g id="a_node33"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
-<polygon fill="#ffffff" stroke="#000000" points="2245,-604.5 2245,-623.5 2325,-623.5 2325,-604.5 2245,-604.5"/>
-<text text-anchor="middle" x="2285" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/type.h</text>
-</a>
+<!-- Node31&#45;&gt;Node24 -->
+<g id="edge42" class="edge">
+<title>Node31&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M724.0883,-243.1427C834.8598,-230.2159 1061.1341,-203.8101 1150.5172,-193.3792"/>
+<polygon fill="#191970" stroke="#191970" points="1150.948,-196.8528 1160.4748,-192.2172 1150.1365,-189.9 1150.948,-196.8528"/>
+</g>
+<!-- Node33&#45;&gt;Node22 -->
+<g id="edge49" class="edge">
+<title>Node33&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M1315.315,-235.4445C1303.3251,-220.383 1289.5023,-196.9378 1301.0432,-179 1308.291,-167.735 1337.4605,-155.137 1362.6006,-145.9771"/>
+<polygon fill="#191970" stroke="#191970" points="1364.0843,-149.1645 1372.3384,-142.5223 1361.7437,-142.5674 1364.0843,-149.1645"/>
+</g>
+<!-- Node33&#45;&gt;Node23 -->
+<g id="edge53" class="edge">
+<title>Node33&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M1392.0921,-246.6046C1481.9565,-240.4126 1652.1089,-226.322 1806.6028,-198.8215"/>
+<polygon fill="#191970" stroke="#191970" points="1807.5353,-202.2099 1816.7554,-196.9907 1806.293,-195.321 1807.5353,-202.2099"/>
+</g>
+<!-- Node33&#45;&gt;Node18 -->
+<g id="edge56" class="edge">
+<title>Node33&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1266.6056,-235.4328C1228.3773,-225.8354 1178.5981,-212.6702 1135.0432,-199 971.2523,-147.5923 936.1446,-117.4081 772.0432,-67 712.1368,-48.5981 640.9021,-31.7307 600.4293,-22.606"/>
+<polygon fill="#191970" stroke="#191970" points="601.1101,-19.1718 590.5871,-20.4032 599.5812,-26.0028 601.1101,-19.1718"/>
+</g>
+<!-- Node33&#45;&gt;Node20 -->
+<g id="edge58" class="edge">
+<title>Node33&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1392.4074,-243.5361C1489.9977,-232.6351 1672.5595,-211.498 1737.0432,-199 1738.5083,-198.7161 1740.0004,-198.4076 1741.5042,-198.081"/>
+<polygon fill="#191970" stroke="#191970" points="1742.514,-201.4387 1751.4374,-195.7269 1740.8998,-194.6273 1742.514,-201.4387"/>
 </g>
+<!-- Node33&#45;&gt;Node24 -->
+<g id="edge45" class="edge">
+<title>Node33&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M1294.5508,-235.4554C1271.5405,-225.419 1241.5347,-212.3314 1219.2378,-202.6061"/>
+<polygon fill="#191970" stroke="#191970" points="1220.4586,-199.3202 1209.8932,-198.5303 1217.66,-205.7364 1220.4586,-199.3202"/>
 </g>
-<!-- Node51&#45;&gt;Node53 -->
-<g id="edge86" class="edge">
-<title>Node51&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1655.8455,-666.6646C1771.4432,-656.9883 2106.7883,-628.9176 2234.6585,-618.2139"/>
-<polygon fill="#191970" stroke="#191970" points="2235.2682,-621.6752 2244.9413,-617.3532 2234.6842,-614.6996 2235.2682,-621.6752"/>
+<!-- Node33&#45;&gt;Node9 -->
+<g id="edge44" class="edge">
+<title>Node33&#45;&gt;Node9</title>
+<path fill="none" stroke="#191970" d="M1266.2782,-235.381C1218.5666,-223.386 1159.685,-207.3019 1151.0432,-199 1138.4109,-186.8645 1133.0307,-167.2962 1130.74,-152.6118"/>
+<polygon fill="#191970" stroke="#191970" points="1134.1971,-152.0459 1129.5215,-142.5386 1127.2478,-152.8866 1134.1971,-152.0459"/>
 </g>
-<!-- Node43 -->
-<g id="node34" class="node">
-<title>Node43</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1396,-604.5 1396,-623.5 1440,-623.5 1440,-604.5 1396,-604.5"/>
-<text text-anchor="middle" x="1418" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">limits</text>
+<!-- Node33&#45;&gt;Node15 -->
+<g id="edge46" class="edge">
+<title>Node33&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M1392.2231,-247.1811C1567.8622,-237.4153 2076.5843,-205.4644 2495.0432,-143 2707.9597,-111.2174 2958.2418,-52.6265 3061.6271,-27.4785"/>
+<polygon fill="#191970" stroke="#191970" points="3062.5742,-30.8502 3071.4597,-25.0797 3060.9151,-24.0496 3062.5742,-30.8502"/>
 </g>
-<!-- Node51&#45;&gt;Node43 -->
-<g id="edge97" class="edge">
-<title>Node51&#45;&gt;Node43</title>
-<path fill="none" stroke="#191970" d="M1581.9626,-660.3733C1545.0297,-649.9276 1486.5005,-633.3739 1450.2732,-623.1278"/>
-<polygon fill="#191970" stroke="#191970" points="1450.8143,-619.6436 1440.2392,-620.2899 1448.9091,-626.3793 1450.8143,-619.6436"/>
+<!-- Node33&#45;&gt;Node25 -->
+<g id="edge47" class="edge">
+<title>Node33&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M1392.2095,-236.1767C1467.314,-219.7143 1596.649,-193.1515 1709.0432,-179 1940.2823,-149.8848 2216.2277,-138.7188 2346.2491,-134.8453"/>
+<polygon fill="#191970" stroke="#191970" points="2346.5941,-138.3368 2356.4877,-134.5464 2346.3898,-131.3398 2346.5941,-138.3368"/>
 </g>
-<!-- Node52&#45;&gt;Node3 -->
-<g id="edge83" class="edge">
-<title>Node52&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M2411.2753,-549.6767C2364.498,-540.1164 2287.4416,-524.3676 2234.7876,-513.6062"/>
-<polygon fill="#191970" stroke="#191970" points="2235.4242,-510.1641 2224.9258,-511.5907 2234.0225,-517.0223 2235.4242,-510.1641"/>
+<!-- Node33&#45;&gt;Node10 -->
+<g id="edge48" class="edge">
+<title>Node33&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1392.1492,-241.3345C1473.3966,-229.4206 1610.067,-208.9639 1660.0432,-199 1696.832,-191.6654 1704.916,-184.3647 1742.0432,-179 2073.6483,-131.0847 2160.9959,-168.8869 2495.0432,-143 2693.2827,-127.6375 2926.488,-99.9704 3040.3705,-85.8351"/>
+<polygon fill="#191970" stroke="#191970" points="3040.8603,-89.3012 3050.3514,-84.5929 3039.9957,-82.3548 3040.8603,-89.3012"/>
 </g>
-<!-- Node52&#45;&gt;Node8 -->
-<g id="edge84" class="edge">
-<title>Node52&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M2455.9417,-548.4697C2470.451,-511.7036 2516.9187,-377.3954 2455,-302 2416.9508,-255.6693 2379.0516,-291.9364 2325,-266 2303.7591,-255.8076 2301.8927,-247.6231 2282,-235 2195.324,-179.9986 2174.3197,-163.5175 2080,-123 2047.8914,-109.2069 2010.2473,-97.4469 1981.226,-89.2705"/>
-<polygon fill="#191970" stroke="#191970" points="1981.9153,-85.8296 1971.3437,-86.5304 1980.0449,-92.5751 1981.9153,-85.8296"/>
+<!-- Node33&#45;&gt;Node16 -->
+<g id="edge54" class="edge">
+<title>Node33&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1368.4747,-235.426C1390.5629,-226.2413 1418.1784,-213.4964 1441.0432,-199 1518.0362,-150.186 1515.1917,-105.0307 1598.0432,-67 1648.3761,-43.8961 1811.8174,-25.5562 1881.7828,-18.5625"/>
+<polygon fill="#191970" stroke="#191970" points="1882.1824,-22.0401 1891.7908,-17.576 1881.4957,-15.0739 1882.1824,-22.0401"/>
 </g>
-<!-- Node52&#45;&gt;Node14 -->
-<g id="edge85" class="edge">
-<title>Node52&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2471.1221,-548.4374C2527.4887,-520.1055 2692.0358,-436.3246 2740,-400 2780.9637,-368.9771 2821,-368.8853 2821,-317.5 2821,-317.5 2821,-317.5 2821,-133 2821,-101.7875 2822.7996,-88.9401 2845,-67 2876.2399,-36.1264 2927.0122,-23.6981 2959.9566,-18.7374"/>
-<polygon fill="#191970" stroke="#191970" points="2960.5415,-22.1902 2969.979,-17.3753 2959.5987,-15.2539 2960.5415,-22.1902"/>
+<!-- Node33&#45;&gt;Node17 -->
+<g id="edge57" class="edge">
+<title>Node33&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1374.6521,-235.4484C1424.4135,-219.5001 1506.6542,-194.522 1579.0432,-179 1918.4708,-106.2183 2007.2423,-107.2558 2352.0432,-67 2455.8455,-54.881 2770.313,-28.7441 2887.445,-19.1379"/>
+<polygon fill="#191970" stroke="#191970" points="2887.8192,-22.6191 2897.5,-18.3142 2887.2476,-15.6424 2887.8192,-22.6191"/>
 </g>
-<!-- Node53&#45;&gt;Node3 -->
-<g id="edge88" class="edge">
-<title>Node53&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M2275.8772,-604.4509C2257.7151,-585.4401 2216.828,-542.6423 2194.0653,-518.816"/>
-<polygon fill="#191970" stroke="#191970" points="2196.5383,-516.3378 2187.0997,-511.5249 2191.4768,-521.1733 2196.5383,-516.3378"/>
+<!-- Node33&#45;&gt;Node27 -->
+<g id="edge52" class="edge">
+<title>Node33&#45;&gt;Node27</title>
+<path fill="none" stroke="#191970" d="M1301.9375,-235.329C1289.1217,-226.5469 1275.1077,-214.2098 1268.0432,-199 1259.692,-181.0198 1274.5193,-162.0862 1288.8079,-149.132"/>
+<polygon fill="#191970" stroke="#191970" points="1291.1634,-151.7238 1296.5399,-142.5945 1286.6438,-146.3784 1291.1634,-151.7238"/>
 </g>
-<!-- Node53&#45;&gt;Node19 -->
-<g id="edge89" class="edge">
-<title>Node53&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M2244.8788,-611.8355C2160.9773,-606.8644 1969.8058,-593.0867 1910,-568 1779.0404,-513.0665 1776.0902,-450.7996 1660,-369 1644.6169,-358.1607 1626.9746,-347.0418 1611.9221,-337.9223"/>
-<polygon fill="#191970" stroke="#191970" points="1613.303,-334.669 1602.9278,-332.5228 1609.7001,-340.6707 1613.303,-334.669"/>
+<!-- Node34 -->
+<g id="node29" class="node">
+<title>Node34</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1310.5432,-179.5 1310.5432,-198.5 1363.5432,-198.5 1363.5432,-179.5 1310.5432,-179.5"/>
+<text text-anchor="middle" x="1337.0432" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstddef</text>
 </g>
-<!-- Node53&#45;&gt;Node8 -->
-<g id="edge91" class="edge">
-<title>Node53&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M2244.7579,-607.6236C2174.2779,-594.0465 2030.1878,-555.004 1969,-456 1891.0905,-329.9397 2046.8697,-258.1146 1986,-123 1980.4303,-110.6369 1969.7137,-100.2563 1959.4494,-92.5033"/>
-<polygon fill="#191970" stroke="#191970" points="1961.2549,-89.4948 1951.0567,-86.623 1957.2382,-95.2277 1961.2549,-89.4948"/>
+<!-- Node33&#45;&gt;Node34 -->
+<g id="edge50" class="edge">
+<title>Node33&#45;&gt;Node34</title>
+<path fill="none" stroke="#191970" d="M1331.0208,-235.2977C1332.0619,-227.2945 1333.351,-217.3843 1334.4691,-208.7889"/>
+<polygon fill="#191970" stroke="#191970" points="1337.9431,-209.2149 1335.7623,-198.8469 1331.0016,-208.3119 1337.9431,-209.2149"/>
 </g>
-<!-- Node53&#45;&gt;Node14 -->
-<g id="edge92" class="edge">
-<title>Node53&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2325.1047,-611.3515C2374.4538,-606.9918 2460.0487,-595.8097 2528,-568 2570.6862,-550.5303 2859,-363.6227 2859,-317.5 2859,-317.5 2859,-317.5 2859,-133 2859,-79.1753 2920.9347,-44.0942 2960.2752,-27.2401"/>
-<polygon fill="#191970" stroke="#191970" points="2961.822,-30.3884 2969.7388,-23.3475 2959.1591,-23.9147 2961.822,-30.3884"/>
+<!-- Node35 -->
+<g id="node30" class="node">
+<title>Node35</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1382.0432,-179.5 1382.0432,-198.5 1432.0432,-198.5 1432.0432,-179.5 1382.0432,-179.5"/>
+<text text-anchor="middle" x="1407.0432" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstring</text>
 </g>
-<!-- Node53&#45;&gt;Node26 -->
-<g id="edge90" class="edge">
-<title>Node53&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2325.3383,-609.2637C2379.9766,-602.2134 2473.9176,-587.65 2502,-568 2574.143,-517.5195 2582.726,-484.6384 2607,-400 2625.4196,-335.7745 2550.7981,-335.5197 2493,-302 2471.3841,-289.464 2446.7499,-275.3818 2428.7253,-265.1156"/>
-<polygon fill="#191970" stroke="#191970" points="2430.3753,-262.0276 2419.953,-260.1233 2426.913,-268.1114 2430.3753,-262.0276"/>
+<!-- Node33&#45;&gt;Node35 -->
+<g id="edge51" class="edge">
+<title>Node33&#45;&gt;Node35</title>
+<path fill="none" stroke="#191970" d="M1348.3241,-235.2977C1359.9408,-226.1384 1374.7251,-214.4816 1386.5525,-205.1562"/>
+<polygon fill="#191970" stroke="#191970" points="1388.8688,-207.787 1394.5545,-198.8469 1384.5347,-202.2901 1388.8688,-207.787"/>
 </g>
-<!-- Node53&#45;&gt;Node52 -->
-<g id="edge87" class="edge">
-<title>Node53&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M2313.7083,-604.3733C2341.3577,-595.1016 2383.3576,-581.0178 2413.8166,-570.804"/>
-<polygon fill="#191970" stroke="#191970" points="2415.2979,-573.9989 2423.6663,-567.5011 2413.0724,-567.3621 2415.2979,-573.9989"/>
+<!-- Node33&#45;&gt;Node36 -->
+<g id="edge55" class="edge">
+<title>Node33&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M1265.7191,-243.2904C1239.5322,-240.4594 1208.8471,-237.3358 1181.0432,-235 932.1893,-214.0939 634.6262,-198.0396 512.071,-191.8148"/>
+<polygon fill="#191970" stroke="#191970" points="511.9708,-188.3054 501.8067,-191.2957 511.6171,-195.2965 511.9708,-188.3054"/>
 </g>
-<!-- Node40&#45;&gt;Node20 -->
-<g id="edge102" class="edge">
-<title>Node40&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M812.2422,-247.8203C939.9738,-241.9108 1224.5172,-226.0897 1318,-199 1336.7099,-193.5782 1338.6154,-185.4391 1357,-179 1419.4491,-157.1275 1495.4068,-144.3676 1541.7039,-138.0426"/>
-<polygon fill="#191970" stroke="#191970" points="1542.2289,-141.5037 1551.6806,-136.7168 1541.3067,-134.5647 1542.2289,-141.5037"/>
+<!-- Node41&#45;&gt;Node21 -->
+<g id="edge74" class="edge">
+<title>Node41&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M2394.7346,-377.3866C2369.0951,-374.4524 2338.6047,-371.2252 2311.0432,-369 1972.1938,-341.643 1566.3383,-325.6853 1402.7348,-319.9453"/>
+<polygon fill="#191970" stroke="#191970" points="1402.428,-316.4326 1392.3121,-319.582 1402.1841,-323.4283 1402.428,-316.4326"/>
 </g>
-<!-- Node40&#45;&gt;Node16 -->
-<g id="edge104" class="edge">
-<title>Node40&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M733.209,-235.4235C718.039,-220.9452 694.4677,-198.4625 674,-179 630.9413,-138.0559 618.7295,-129.298 577,-87 559.3464,-69.1059 539.9174,-47.8579 526.7409,-33.2001"/>
-<polygon fill="#191970" stroke="#191970" points="529.043,-30.524 519.766,-25.4065 523.8268,-35.1923 529.043,-30.524"/>
+<!-- Node41&#45;&gt;Node23 -->
+<g id="edge97" class="edge">
+<title>Node41&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M2422.9725,-369.3615C2408.3537,-360.5881 2391.6866,-348.2488 2381.0432,-333 2355.1855,-295.9532 2388.7099,-263.9708 2354.0432,-235 2318.3318,-205.1561 1998.7183,-193.3074 1885.5456,-190.0239"/>
+<polygon fill="#191970" stroke="#191970" points="1885.3493,-186.517 1875.2543,-189.7326 1885.1512,-193.5142 1885.3493,-186.517"/>
 </g>
-<!-- Node40&#45;&gt;Node22 -->
-<g id="edge105" class="edge">
-<title>Node40&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M812.2352,-243.2036C924.1714,-230.2879 1153.8981,-203.781 1244.2798,-193.3523"/>
-<polygon fill="#191970" stroke="#191970" points="1244.8113,-196.8143 1254.3442,-192.1911 1244.0089,-189.8605 1244.8113,-196.8143"/>
+<!-- Node41&#45;&gt;Node18 -->
+<g id="edge101" class="edge">
+<title>Node41&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M2394.7652,-376.9656C2369.1312,-373.9551 2338.6367,-370.7847 2311.0432,-369 2262.2461,-365.8439 585.2648,-367.9287 551.0432,-333 469.7482,-250.0251 533.5929,-88.9236 558.894,-34.2067"/>
+<polygon fill="#191970" stroke="#191970" points="562.1363,-35.5379 563.2565,-25.0025 555.8108,-32.5399 562.1363,-35.5379"/>
 </g>
-<!-- Node40&#45;&gt;Node34 -->
-<g id="edge103" class="edge">
-<title>Node40&#45;&gt;Node34</title>
-<path fill="none" stroke="#191970" d="M717.1984,-235.4554C696.1735,-225.509 668.8141,-212.5659 648.3157,-202.8686"/>
-<polygon fill="#191970" stroke="#191970" points="649.6816,-199.6429 639.1454,-198.5303 646.6881,-205.9706 649.6816,-199.6429"/>
+<!-- Node41&#45;&gt;Node20 -->
+<g id="edge102" class="edge">
+<title>Node41&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M2426.0087,-369.3338C2408.6382,-359.3852 2385.7238,-345.8785 2366.0432,-333 2303.7021,-292.2054 2299.9065,-260.8793 2230.0432,-235 2056.5444,-170.7312 1995.0103,-232.3095 1808.5293,-198.6548"/>
+<polygon fill="#191970" stroke="#191970" points="1809.1612,-195.2123 1798.6878,-196.8118 1807.8726,-202.0927 1809.1612,-195.2123"/>
 </g>
-<!-- Node54&#45;&gt;Node19 -->
-<g id="edge112" class="edge">
-<title>Node54&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M1424.7453,-716.3499C1408.3076,-695.9257 1374.1293,-646.6954 1387,-604 1404.573,-545.7062 1514.8184,-398.3528 1558.8586,-340.9236"/>
-<polygon fill="#191970" stroke="#191970" points="1561.7734,-342.8748 1565.0961,-332.8144 1556.2249,-338.6069 1561.7734,-342.8748"/>
+<!-- Node41&#45;&gt;Node15 -->
+<g id="edge82" class="edge">
+<title>Node41&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M2511.2257,-379.1012C2597.5055,-370.5817 2754.9062,-352.9253 2808.0432,-333 2831.8221,-324.0834 2832,-310.1768 2856.0432,-302 2948.2944,-270.6265 3223.8979,-334.6548 3293.0432,-266 3358.8752,-200.6351 3300.2748,-127.6131 3230.0432,-67 3209.0757,-48.9041 3180.7919,-36.3511 3156.9498,-28.1718"/>
+<polygon fill="#191970" stroke="#191970" points="3157.9714,-24.8235 3147.3787,-25.037 3155.7926,-31.4758 3157.9714,-24.8235"/>
 </g>
-<!-- Node54&#45;&gt;Node14 -->
-<g id="edge114" class="edge">
-<title>Node54&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1477.8092,-722.6472C1697.7678,-706.1515 2653.8443,-634.0006 2683,-624 2705.9853,-616.1159 3049,-408.7998 3049,-384.5 3049,-384.5 3049,-384.5 3049,-133 3049,-94.341 3023.7138,-55.0543 3006.825,-33.1259"/>
-<polygon fill="#191970" stroke="#191970" points="3009.3871,-30.7255 3000.4105,-25.0977 3003.9183,-35.095 3009.3871,-30.7255"/>
+<!-- Node41&#45;&gt;Node10 -->
+<g id="edge94" class="edge">
+<title>Node41&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2511.3344,-377.2098C2581.1973,-367.8728 2693.6559,-350.7641 2732.0432,-333 2751.8238,-323.8463 2752.4888,-314.9192 2770.0432,-302 2884.7972,-217.5469 3029.3014,-126.7937 3085.821,-91.8632"/>
+<polygon fill="#191970" stroke="#191970" points="3087.7474,-94.7873 3094.422,-86.5592 3084.0731,-88.8291 3087.7474,-94.7873"/>
 </g>
-<!-- Node54&#45;&gt;Node31 -->
-<g id="edge113" class="edge">
-<title>Node54&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1421.5931,-716.2685C1411.8054,-707.4998 1397.7737,-693.8868 1388,-680 1308.3737,-566.8647 1283.0114,-534.8267 1252,-400 1238.7775,-342.5132 1301.6092,-295.5143 1345.4206,-270.3817"/>
-<polygon fill="#191970" stroke="#191970" points="1347.1433,-273.4286 1354.1736,-265.5024 1343.7349,-267.3144 1347.1433,-273.4286"/>
+<!-- Node41&#45;&gt;Node11 -->
+<g id="edge73" class="edge">
+<title>Node41&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M2511.4019,-380.3739C2605.562,-373.1298 2786.4729,-356.6184 2846.0432,-333 2867.9444,-324.3166 2866.9466,-310.1731 2889.0432,-302 3068.0823,-235.7769 3144.0124,-346.6276 3317.0432,-266 3418.7392,-218.6125 3447.2455,-188.5039 3495.0432,-87 3501.8181,-72.6128 3503.5809,-54.821 3503.6437,-40.5846"/>
+<polygon fill="#191970" stroke="#191970" points="3507.141,-40.4244 3503.4151,-30.5063 3500.1428,-40.5831 3507.141,-40.4244"/>
 </g>
-<!-- Node54&#45;&gt;Node51 -->
-<g id="edge111" class="edge">
-<title>Node54&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1464.4588,-716.3733C1495.0207,-707.021 1541.5826,-692.7725 1575.0261,-682.5385"/>
-<polygon fill="#191970" stroke="#191970" points="1576.4136,-685.7742 1584.9517,-679.5011 1574.3652,-679.0806 1576.4136,-685.7742"/>
+<!-- Node41&#45;&gt;Node16 -->
+<g id="edge98" class="edge">
+<title>Node41&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2486.7765,-369.3642C2501.3434,-360.9147 2516.8909,-348.8543 2525.0432,-333 2545.5019,-293.2128 2530.3282,-269.6633 2502.0432,-235 2499.1383,-231.4399 2221.3588,-68.5777 2217.0432,-67 2122.6475,-32.4893 2003.1243,-20.8825 1946.5539,-17.1582"/>
+<polygon fill="#191970" stroke="#191970" points="1946.6211,-13.6558 1936.4243,-16.5324 1946.1894,-20.6425 1946.6211,-13.6558"/>
 </g>
-<!-- Node56&#45;&gt;Node3 -->
-<g id="edge117" class="edge">
-<title>Node56&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1363.5201,-773.8551C1404.028,-765.5686 1467.2285,-751.7339 1521,-736 1764.5532,-664.7345 2049.6699,-553.32 2145.0556,-515.2603"/>
-<polygon fill="#191970" stroke="#191970" points="2146.4736,-518.4628 2154.4594,-511.5002 2143.8747,-511.9631 2146.4736,-518.4628"/>
+<!-- Node41&#45;&gt;Node17 -->
+<g id="edge100" class="edge">
+<title>Node41&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M2511.2704,-378.0287C2574.3537,-370.0388 2669.9054,-354.7828 2699.0432,-333 2752.7949,-292.8165 2795.2206,-115.079 2842.0432,-67 2857.8726,-50.7459 2879.9293,-38.0139 2898.2452,-29.2755"/>
+<polygon fill="#191970" stroke="#191970" points="2899.7929,-32.4164 2907.4294,-25.0725 2896.8799,-26.0513 2899.7929,-32.4164"/>
 </g>
-<!-- Node56&#45;&gt;Node19 -->
-<g id="edge119" class="edge">
-<title>Node56&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M1280.2804,-775.5047C1217.3212,-763.3238 1107.2318,-731.5286 1126,-660 1163.6781,-516.4024 1168.9638,-454.966 1290,-369 1323.5543,-345.1681 1430.269,-330.8337 1503.6695,-323.5465"/>
-<polygon fill="#191970" stroke="#191970" points="1504.179,-327.0136 1513.7945,-322.5647 1503.5033,-320.0462 1504.179,-327.0136"/>
+<!-- Node41&#45;&gt;Node28 -->
+<g id="edge81" class="edge">
+<title>Node41&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M2511.2987,-382.1829C2576.3395,-377.9848 2683.7896,-366.1817 2770.0432,-333 2793.7455,-323.8818 2794.1979,-310.7372 2818.0432,-302 2819.9016,-301.3191 3020.3211,-275.5065 3135.6437,-260.6904"/>
+<polygon fill="#191970" stroke="#191970" points="3136.3725,-264.1257 3145.845,-259.38 3135.4806,-257.1827 3136.3725,-264.1257"/>
 </g>
-<!-- Node56&#45;&gt;Node20 -->
-<g id="edge124" class="edge">
-<title>Node56&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1280.4301,-776.2405C1233.0171,-768.8857 1159.4089,-754.8224 1139,-736 1058.7958,-662.0305 1123.2644,-592.3886 1166,-492 1191.6381,-431.7743 1196.2434,-410.3305 1247,-369 1327.6106,-303.3597 1367.319,-318.574 1457,-266 1502.2009,-239.5017 1520.3873,-239.193 1554,-199 1565.4531,-185.3047 1573.4245,-166.5238 1578.2563,-152.4974"/>
-<polygon fill="#191970" stroke="#191970" points="1581.6256,-153.4522 1581.3448,-142.8611 1574.9596,-151.3156 1581.6256,-153.4522"/>
+<!-- Node41&#45;&gt;Node30 -->
+<g id="edge93" class="edge">
+<title>Node41&#45;&gt;Node30</title>
+<path fill="none" stroke="#191970" d="M2403.671,-369.4639C2364.6467,-357.5792 2310.8174,-341.1858 2274.1233,-330.0108"/>
+<polygon fill="#191970" stroke="#191970" points="2274.8405,-326.5705 2264.2546,-327.0053 2272.8012,-333.2669 2274.8405,-326.5705"/>
 </g>
-<!-- Node56&#45;&gt;Node16 -->
-<g id="edge128" class="edge">
-<title>Node56&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1280.2299,-780.865C1163.7178,-777.3619 836.9571,-765.1458 734,-736 524.0011,-676.5521 435.7086,-680.5037 297,-512 277.6002,-488.4331 278,-476.5246 278,-446 278,-446 278,-446 278,-133 278,-44.3808 414.0378,-22.5872 478.2197,-17.2366"/>
-<polygon fill="#191970" stroke="#191970" points="478.5238,-20.7237 488.2347,-16.4871 478.0014,-13.7432 478.5238,-20.7237"/>
+<!-- Node41&#45;&gt;Node40 -->
+<g id="edge95" class="edge">
+<title>Node41&#45;&gt;Node40</title>
+<path fill="none" stroke="#191970" d="M2394.9161,-378.2169C2314.3348,-368.914 2176.8575,-350.6663 2162.0432,-333 2147.4934,-315.6491 2152.0453,-288.2918 2157.6631,-269.8878"/>
+<polygon fill="#191970" stroke="#191970" points="2161.0417,-270.8243 2160.9577,-260.2298 2154.4165,-268.5643 2161.0417,-270.8243"/>
 </g>
-<!-- Node56&#45;&gt;Node9 -->
-<g id="edge118" class="edge">
-<title>Node56&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M1280.394,-781.3475C1170.5164,-779.1657 874.7995,-770.1938 784,-736 705.4698,-706.4266 686.7005,-687.6352 632,-624 612.3991,-601.1975 506,-414.5691 506,-384.5 506,-384.5 506,-384.5 506,-317.5 506,-250.9353 510.7069,-220.1861 563,-179 690.5524,-78.5397 1123.9437,-87.561 1285,-67 1440.5876,-47.1372 1623.5369,-30.3368 1723.4906,-21.7128"/>
-<polygon fill="#191970" stroke="#191970" points="1723.8272,-25.1969 1733.4908,-20.8534 1723.2278,-18.2226 1723.8272,-25.1969"/>
+<!-- Node41&#45;&gt;Node42 -->
+<g id="edge75" class="edge">
+<title>Node41&#45;&gt;Node42</title>
+<path fill="none" stroke="#191970" d="M2394.7563,-377.0981C2369.1207,-374.1116 2338.6274,-370.9233 2311.0432,-369 2148.8217,-357.6891 1000.0255,-327.3045 696.4625,-319.4005"/>
+<polygon fill="#191970" stroke="#191970" points="696.5364,-315.9013 686.4487,-319.1399 696.3542,-322.8989 696.5364,-315.9013"/>
 </g>
-<!-- Node56&#45;&gt;Node14 -->
-<g id="edge126" class="edge">
-<title>Node56&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1363.8146,-780.7329C1485.2973,-776.8237 1848.641,-763.5188 2149,-736 2344.4328,-718.0945 2394.0617,-715.9078 2587,-680 2689.5836,-660.9081 2715.8549,-656.5291 2815,-624 2963.1579,-575.39 3023.0299,-575.6648 3123,-456 3146.3451,-428.0557 3163,-420.9126 3163,-384.5 3163,-384.5 3163,-384.5 3163,-133 3163,-66.2473 3073.3907,-34.5655 3023.9084,-22.1594"/>
-<polygon fill="#191970" stroke="#191970" points="3024.5442,-18.713 3014.0052,-19.799 3022.9212,-25.5223 3024.5442,-18.713"/>
+<!-- Node43 -->
+<g id="node35" class="node">
+<title>Node43</title>
+<g id="a_node35"><a xlink:href="runtime_2module_8h.html" target="_top" xlink:title="Runtime container of the functions generated by TVM, This is used to support dynamically link...">
+<polygon fill="#ffffff" stroke="#ff0000" points="2390.0432,-308 2390.0432,-327 2516.0432,-327 2516.0432,-308 2390.0432,-308"/>
+<text text-anchor="middle" x="2453.0432" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/module.h</text>
+</a>
 </g>
-<!-- Node56&#45;&gt;Node26 -->
-<g id="edge122" class="edge">
-<title>Node56&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1363.7031,-779.9741C1585.0865,-768.8709 2616.7063,-712.6416 2725,-624 2772.2415,-585.3315 2788.2685,-547.5744 2763,-492 2702.8944,-359.8063 2528.8809,-289.6921 2446.8142,-263.1746"/>
-<polygon fill="#191970" stroke="#191970" points="2447.6302,-259.7619 2437.0402,-260.0827 2445.5189,-266.4359 2447.6302,-259.7619"/>
 </g>
-<!-- Node56&#45;&gt;Node31 -->
-<g id="edge121" class="edge">
-<title>Node56&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1280.2053,-779.5218C1185.8325,-773.5737 962.3778,-757.3811 936,-736 829.1485,-649.389 810.6168,-564.3745 860,-436 888.8098,-361.1073 909.5372,-338.4963 981,-302 1110.2117,-236.0112 1164.042,-287.2342 1311.5368,-266.0592"/>
-<polygon fill="#191970" stroke="#191970" points="1312.4977,-269.453 1321.8542,-264.4826 1311.4402,-262.5334 1312.4977,-269.453"/>
+<!-- Node41&#45;&gt;Node43 -->
+<g id="edge83" class="edge">
+<title>Node41&#45;&gt;Node43</title>
+<path fill="none" stroke="#191970" d="M2447.2824,-369.2967C2446.1344,-359.7699 2445.9857,-347.3954 2446.8362,-337.1306"/>
+<polygon fill="#191970" stroke="#191970" points="2450.3275,-337.4239 2448.1513,-327.055 2443.3864,-336.5178 2450.3275,-337.4239"/>
 </g>
-<!-- Node56&#45;&gt;Node34 -->
-<g id="edge127" class="edge">
-<title>Node56&#45;&gt;Node34</title>
-<path fill="none" stroke="#191970" d="M1280.4716,-781.4542C1184.1792,-779.5926 948.5911,-771.4421 879,-736 789.5396,-690.4386 619.1107,-491.1356 577,-400 558.6536,-360.2949 558.7976,-344.7598 568,-302 575.5791,-266.7829 595.0468,-229.315 607.6186,-207.5921"/>
-<polygon fill="#191970" stroke="#191970" points="610.7518,-209.1704 612.8421,-198.7839 604.7308,-205.5999 610.7518,-209.1704"/>
+<!-- Node41&#45;&gt;Node45 -->
+<g id="edge96" class="edge">
+<title>Node41&#45;&gt;Node45</title>
+<path fill="none" stroke="#191970" d="M2511.2095,-382.8343C2594.9633,-379.255 2753.7807,-368.036 2884.0432,-333 2886.3719,-332.3737 2888.7457,-331.6237 2891.1075,-330.7964"/>
+<polygon fill="#191970" stroke="#191970" points="2892.5674,-333.9836 2900.6027,-327.0781 2890.0149,-327.4655 2892.5674,-333.9836"/>
 </g>
-<!-- Node56&#45;&gt;Node51 -->
-<g id="edge116" class="edge">
-<title>Node56&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1363.5062,-772.7505C1397.1076,-764.6965 1445.3601,-751.8386 1486,-736 1523.8077,-721.2652 1565.2433,-699.1028 1590.9367,-684.5938"/>
-<polygon fill="#191970" stroke="#191970" points="1592.7014,-687.6166 1599.6545,-679.6225 1589.2338,-681.5357 1592.7014,-687.6166"/>
+<!-- Node46 -->
+<g id="node37" class="node">
+<title>Node46</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2648.5432,-308 2648.5432,-327 2689.5432,-327 2689.5432,-308 2648.5432,-308"/>
+<text text-anchor="middle" x="2669.0432" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tuple</text>
 </g>
-<!-- Node56&#45;&gt;Node43 -->
-<g id="edge125" class="edge">
-<title>Node56&#45;&gt;Node43</title>
-<path fill="none" stroke="#191970" d="M1327.4364,-772.4862C1342.8089,-745.5845 1386.8003,-668.5994 1407.2758,-632.7673"/>
-<polygon fill="#191970" stroke="#191970" points="1410.4615,-634.2468 1412.3841,-623.8279 1404.3838,-630.7738 1410.4615,-634.2468"/>
+<!-- Node41&#45;&gt;Node46 -->
+<g id="edge99" class="edge">
+<title>Node41&#45;&gt;Node46</title>
+<path fill="none" stroke="#191970" d="M2511.1661,-370.1742C2546.9888,-360.8017 2593.6425,-347.6126 2634.0432,-333 2635.8712,-332.3389 2637.7383,-331.6264 2639.6119,-330.8831"/>
+<polygon fill="#191970" stroke="#191970" points="2641.008,-334.093 2648.8808,-327.0029 2638.3049,-327.636 2641.008,-334.093"/>
 </g>
-<!-- Node56&#45;&gt;Node40 -->
-<g id="edge120" class="edge">
-<title>Node56&#45;&gt;Node40</title>
-<path fill="none" stroke="#191970" d="M1280.2947,-781.1357C1188.8327,-778.6373 973.7067,-769.2943 910,-736 798.183,-677.5624 729.6341,-382.7875 720,-333 717.3825,-319.4731 716.5056,-315.3273 720,-302 722.5228,-292.3784 727.2931,-282.6592 732.2472,-274.2958"/>
-<polygon fill="#191970" stroke="#191970" points="735.2183,-276.1459 737.5974,-265.8216 729.2993,-272.4089 735.2183,-276.1459"/>
+<!-- Node42&#45;&gt;Node22 -->
+<g id="edge76" class="edge">
+<title>Node42&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M608.1907,-302.4919C592.2894,-284.613 571.5591,-254.6945 589.0432,-235 703.5976,-105.9637 1190.1677,-173.2551 1360.0432,-143 1360.1447,-142.9819 1360.2463,-142.9638 1360.348,-142.9455"/>
+<polygon fill="#191970" stroke="#191970" points="1360.892,-146.4059 1370.0191,-141.0256 1359.5289,-139.5399 1360.892,-146.4059"/>
 </g>
-<!-- Node56&#45;&gt;Node54 -->
-<g id="edge123" class="edge">
-<title>Node56&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M1341.0816,-772.3733C1358.7402,-763.4644 1385.2053,-750.1127 1405.2098,-740.0203"/>
-<polygon fill="#191970" stroke="#191970" points="1406.8158,-743.1303 1414.1674,-735.5011 1403.6628,-736.8806 1406.8158,-743.1303"/>
+<!-- Node42&#45;&gt;Node18 -->
+<g id="edge78" class="edge">
+<title>Node42&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M605.183,-302.253C595.563,-292.879 584.563,-280.004 579.0432,-266 547.2455,-185.326 558.2726,-78.5293 564.7313,-35.1091"/>
+<polygon fill="#191970" stroke="#191970" points="568.2169,-35.4745 566.3196,-25.051 561.3026,-34.3826 568.2169,-35.4745"/>
 </g>
-<!-- Node57&#45;&gt;Node16 -->
-<g id="edge133" class="edge">
-<title>Node57&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1405.6927,-837.1931C1309.8602,-834.8071 1069.1424,-825.6706 871,-792 774.4996,-775.6015 752.1311,-762.8433 658,-736 576.6132,-712.791 553.1845,-714.7135 476,-680 341.2891,-619.4141 240,-593.7081 240,-446 240,-446 240,-446 240,-133 240,-96.1996 259.1315,-87.0351 290,-67 320.9029,-46.9425 424.6127,-28.7039 478.2504,-20.3324"/>
-<polygon fill="#191970" stroke="#191970" points="478.9216,-23.7704 488.2739,-18.792 477.8583,-16.8516 478.9216,-23.7704"/>
+<!-- Node42&#45;&gt;Node24 -->
+<g id="edge79" class="edge">
+<title>Node42&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M686.2113,-303.1335C804.3104,-276.2737 1055.2056,-219.2117 1150.373,-197.5675"/>
+<polygon fill="#191970" stroke="#191970" points="1151.4293,-200.9167 1160.404,-195.2861 1149.8768,-194.091 1151.4293,-200.9167"/>
 </g>
-<!-- Node57&#45;&gt;Node18 -->
-<g id="edge134" class="edge">
-<title>Node57&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1490.1284,-837.5431C1712.3955,-835.0099 2740.8857,-821.7147 2801,-792 2863.8505,-760.9328 2901,-740.1096 2901,-670 2901,-670 2901,-670 2901,-614 2901,-399.722 2800.5293,-318.0496 2603,-235 2525.1284,-202.2595 2265.831,-192.3292 2172.6834,-189.7775"/>
-<polygon fill="#191970" stroke="#191970" points="2172.7626,-186.2785 2162.674,-189.5145 2172.5786,-193.2761 2172.7626,-186.2785"/>
+<!-- Node42&#45;&gt;Node31 -->
+<g id="edge80" class="edge">
+<title>Node42&#45;&gt;Node31</title>
+<path fill="none" stroke="#191970" d="M631.666,-302.2967C636.3302,-294.0729 642.198,-283.7272 647.4606,-274.4483"/>
+<polygon fill="#191970" stroke="#191970" points="650.5656,-276.0683 652.4546,-265.6432 644.4767,-272.6148 650.5656,-276.0683"/>
 </g>
-<!-- Node57&#45;&gt;Node14 -->
-<g id="edge131" class="edge">
-<title>Node57&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1490.0844,-837.8897C1707.8662,-837.1469 2701.8653,-831.5965 2833,-792 3089.4121,-714.5756 3277,-652.3464 3277,-384.5 3277,-384.5 3277,-384.5 3277,-133 3277,-101.7875 3276.7025,-87.308 3253,-67 3218.9329,-37.8118 3086.2767,-23.2664 3024.274,-17.9483"/>
-<polygon fill="#191970" stroke="#191970" points="3024.5105,-14.456 3014.2553,-17.1169 3023.9316,-21.432 3024.5105,-14.456"/>
+<!-- Node42&#45;&gt;Node36 -->
+<g id="edge77" class="edge">
+<title>Node42&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M582.5533,-302.4847C562.0044,-293.7085 537.3712,-281.3295 518.0432,-266 496.8053,-249.1557 477.9304,-224.0261 466.5027,-207.0998"/>
+<polygon fill="#191970" stroke="#191970" points="469.3662,-205.084 460.9525,-198.6451 463.5144,-208.9255 469.3662,-205.084"/>
 </g>
-<!-- Node57&#45;&gt;Node15 -->
-<g id="edge132" class="edge">
-<title>Node57&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1405.6997,-834.7512C1271.5755,-824.0167 857.3835,-787.4189 734,-736 695.9289,-720.1342 690.4306,-707.8409 660,-680 643.6483,-665.0398 392,-406.6627 392,-384.5 392,-384.5 392,-384.5 392,-250.5 392,-148.8045 493.203,-172.5688 582,-123 634.4474,-93.7225 647.4141,-84.081 705,-67 808.8202,-36.2051 936.0988,-23.2397 1002.2552,-18.2662"/>
-<polygon fill="#191970" stroke="#191970" points="1002.6145,-21.7494 1012.3351,-17.5351 1002.1081,-14.7678 1002.6145,-21.7494"/>
+<!-- Node43&#45;&gt;Node23 -->
+<g id="edge88" class="edge">
+<title>Node43&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M2459.2736,-307.918C2469.4864,-290.8945 2486.8665,-255.2668 2468.0432,-235 2448.1562,-213.5879 2019.4003,-195.5739 1885.5772,-190.4561"/>
+<polygon fill="#191970" stroke="#191970" points="1885.4933,-186.9505 1875.3678,-190.0689 1885.228,-193.9455 1885.4933,-186.9505"/>
 </g>
-<!-- Node57&#45;&gt;Node56 -->
-<g id="edge130" class="edge">
-<title>Node57&#45;&gt;Node56</title>
-<path fill="none" stroke="#191970" d="M1426.3398,-828.3733C1406.0228,-819.3435 1375.436,-805.7494 1352.625,-795.6111"/>
-<polygon fill="#191970" stroke="#191970" points="1353.9372,-792.3642 1343.3775,-791.5011 1351.0941,-798.7609 1353.9372,-792.3642"/>
+<!-- Node43&#45;&gt;Node20 -->
+<g id="edge91" class="edge">
+<title>Node43&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M2445.292,-307.8821C2430.0467,-289.7797 2394.1385,-251.028 2354.0432,-235 2244.0005,-191.0107 1943.4632,-219.2532 1808.8537,-198.585"/>
+<polygon fill="#191970" stroke="#191970" points="1809.1202,-195.0806 1798.6773,-196.8679 1807.9555,-201.983 1809.1202,-195.0806"/>
 </g>
-<!-- Node58&#45;&gt;Node3 -->
-<g id="edge163" class="edge">
-<title>Node58&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1699.2388,-953.2913C1951.41,-942.9473 2983.0067,-897.7016 3029,-848 3069.2568,-804.4974 3054.9399,-754.6457 3010,-716 2833.7074,-564.3987 2713.9542,-702.2375 2495,-624 2429.7955,-600.7009 2423.3224,-573.634 2359,-548 2319.769,-532.3655 2273.6022,-520.7322 2237.7137,-513.076"/>
-<polygon fill="#191970" stroke="#191970" points="2238.3825,-509.6402 2227.8782,-511.022 2236.9515,-516.4924 2238.3825,-509.6402"/>
+<!-- Node43&#45;&gt;Node25 -->
+<g id="edge86" class="edge">
+<title>Node43&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M2486.9979,-307.8766C2507.4317,-300.139 2531.7901,-287.0137 2544.0432,-266 2550.9834,-254.0978 2549.9022,-247.47 2544.0432,-235 2524.8826,-194.2189 2480.5229,-164.1267 2450.6526,-147.576"/>
+<polygon fill="#191970" stroke="#191970" points="2451.9818,-144.3176 2441.5139,-142.6819 2448.677,-150.4884 2451.9818,-144.3176"/>
 </g>
-<!-- Node58&#45;&gt;Node18 -->
-<g id="edge186" class="edge">
-<title>Node58&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1699.1041,-954.1926C1972.1468,-947.36 3167,-913.1663 3167,-838 3167,-838 3167,-838 3167,-670 3167,-387.3246 2974.2687,-314.49 2703,-235 2602.5102,-205.5534 2278.2829,-193.2508 2172.6271,-189.9589"/>
-<polygon fill="#191970" stroke="#191970" points="2172.6347,-186.4576 2162.5327,-189.6513 2172.4214,-193.4543 2172.6347,-186.4576"/>
+<!-- Node43&#45;&gt;Node10 -->
+<g id="edge87" class="edge">
+<title>Node43&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2479.2039,-307.9237C2581.7043,-270.4026 2956.1274,-133.3421 3074.1544,-90.1374"/>
+<polygon fill="#191970" stroke="#191970" points="3075.6204,-93.3279 3083.8079,-86.6037 3073.2141,-86.7545 3075.6204,-93.3279"/>
 </g>
-<!-- Node58&#45;&gt;Node14 -->
-<g id="edge184" class="edge">
-<title>Node58&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1699.4069,-954.6564C1992.9363,-949.8323 3353,-922.8989 3353,-838 3353,-838 3353,-838 3353,-133 3353,-95.6561 3333.1387,-86.0176 3301,-67 3254.4612,-39.4614 3093.7681,-23.6307 3024.3537,-17.9358"/>
-<polygon fill="#191970" stroke="#191970" points="3024.2759,-14.4185 3014.0283,-17.1086 3023.7169,-21.3961 3024.2759,-14.4185"/>
+<!-- Node43&#45;&gt;Node11 -->
+<g id="edge84" class="edge">
+<title>Node43&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M2493.6097,-307.9351C2502.6247,-305.8984 2512.1454,-303.816 2521.0432,-302 2715.9655,-262.2166 3231.6278,-245.6448 3402.0432,-143 3423.6297,-129.998 3463.9786,-72.7499 3486.2567,-39.5529"/>
+<polygon fill="#191970" stroke="#191970" points="3489.4,-41.1473 3492.0305,-30.8842 3483.574,-37.2669 3489.4,-41.1473"/>
 </g>
-<!-- Node58&#45;&gt;Node51 -->
-<g id="edge137" class="edge">
-<title>Node58&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1640.1067,-945.9356C1632.4131,-926.143 1616,-879.1022 1616,-838 1616,-838 1616,-838 1616,-782 1616,-749.7116 1616,-712.1872 1616,-689.8048"/>
-<polygon fill="#191970" stroke="#191970" points="1619.5001,-689.5883 1616,-679.5884 1612.5001,-689.5884 1619.5001,-689.5883"/>
+<!-- Node43&#45;&gt;Node16 -->
+<g id="edge89" class="edge">
+<title>Node43&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2462.6781,-307.8078C2477.6735,-291.5293 2503.1814,-258.2043 2486.0432,-235 2453.7206,-191.2365 2417.7841,-223.1729 2369.0432,-199 2278.8781,-154.2828 2277.5442,-106.6606 2185.0432,-67 2103.68,-32.1148 1998.5198,-20.7573 1946.3515,-17.1389"/>
+<polygon fill="#191970" stroke="#191970" points="1946.2548,-13.6256 1936.0513,-16.4786 1945.8069,-20.6113 1946.2548,-13.6256"/>
 </g>
-<!-- Node59 -->
-<g id="node40" class="node">
-<title>Node59</title>
-<g id="a_node40"><a xlink:href="ir_2module_8h.html" target="_top" xlink:title="IRModule that holds the functions and type definitions. ">
-<polygon fill="#ffffff" stroke="#000000" points="1644.5,-772.5 1644.5,-791.5 1739.5,-791.5 1739.5,-772.5 1644.5,-772.5"/>
-<text text-anchor="middle" x="1692" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/module.h</text>
-</a>
+<!-- Node43&#45;&gt;Node33 -->
+<g id="edge85" class="edge">
+<title>Node43&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M2389.7311,-310.1432C2363.5461,-307.2897 2332.859,-304.189 2305.0432,-302 1968.4109,-275.5086 1565.3572,-259.0829 1402.5755,-253.0821"/>
+<polygon fill="#191970" stroke="#191970" points="1402.3256,-249.5707 1392.2041,-252.7019 1402.0691,-256.566 1402.3256,-249.5707"/>
 </g>
+<!-- Node43&#45;&gt;Node36 -->
+<g id="edge90" class="edge">
+<title>Node43&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M2389.7558,-309.8037C2363.5745,-306.8982 2332.8836,-303.8504 2305.0432,-302 1742.5017,-264.6112 1599.0245,-311.0326 1037.0432,-266 843.3482,-250.4789 615.3628,-215.3697 511.5895,-198.4566"/>
+<polygon fill="#191970" stroke="#191970" points="512.0757,-194.9897 501.6417,-196.8289 510.9453,-201.8978 512.0757,-194.9897"/>
 </g>
-<!-- Node58&#45;&gt;Node59 -->
-<g id="edge138" class="edge">
-<title>Node58&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M1646.6491,-945.9248C1654.2693,-918.3807 1676.3557,-838.5477 1686.6284,-801.416"/>
-<polygon fill="#191970" stroke="#191970" points="1690.0068,-802.3308 1689.3,-791.7595 1683.2602,-800.4642 1690.0068,-802.3308"/>
+<!-- Node43&#45;&gt;Node41 -->
+<g id="edge92" class="edge">
+<title>Node43&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M2457.9352,-327.055C2459.5588,-335.4031 2460.1184,-347.9072 2459.6138,-359.1199"/>
+<polygon fill="#191970" stroke="#191970" points="2456.1084,-359.0505 2458.8041,-369.2967 2463.0863,-359.6058 2456.1084,-359.0505"/>
 </g>
-<!-- Node65 -->
-<g id="node43" class="node">
-<title>Node65</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2749,-716.5 2749,-735.5 2835,-735.5 2835,-716.5 2749,-716.5"/>
-<text text-anchor="middle" x="2792" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_set</text>
+<!-- Node49&#45;&gt;Node4 -->
+<g id="edge118" class="edge">
+<title>Node49&#45;&gt;Node4</title>
+<path fill="none" stroke="#191970" d="M2276.8557,-615.3906C2270.0767,-606.3273 2259.7454,-592.5149 2251.4459,-581.4188"/>
+<polygon fill="#191970" stroke="#191970" points="2254.1244,-579.1563 2245.3321,-573.2449 2248.519,-583.349 2254.1244,-579.1563"/>
 </g>
-<!-- Node58&#45;&gt;Node65 -->
-<g id="edge185" class="edge">
-<title>Node58&#45;&gt;Node65</title>
-<path fill="none" stroke="#191970" d="M1691.8666,-945.9309C1876.9665,-908.927 2545.1631,-775.3459 2738.9667,-736.602"/>
-<polygon fill="#191970" stroke="#191970" points="2739.7887,-740.007 2748.9086,-734.6145 2738.4164,-733.1429 2739.7887,-740.007"/>
+<!-- Node49&#45;&gt;Node5 -->
+<g id="edge119" class="edge">
+<title>Node49&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M2287.027,-615.3459C2291.1274,-600.3426 2297.0351,-570.7866 2288.0432,-548 2283.4963,-536.4774 2274.5501,-526.143 2265.9898,-518.2024"/>
+<polygon fill="#191970" stroke="#191970" points="2268.2279,-515.5106 2258.3742,-511.6175 2263.6495,-520.8057 2268.2279,-515.5106"/>
 </g>
-<!-- Node66 -->
-<g id="node44" class="node">
-<title>Node66</title>
-<g id="a_node44"><a xlink:href="with_8h.html" target="_top" xlink:title="RAII wrapper function to enter and exit a context object similar to python&#39;s with syntax...">
-<polygon fill="#ffffff" stroke="#000000" points="332.5,-772.5 332.5,-791.5 441.5,-791.5 441.5,-772.5 332.5,-772.5"/>
-<text text-anchor="middle" x="387" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/support/with.h</text>
-</a>
+<!-- Node49&#45;&gt;Node21 -->
+<g id="edge120" class="edge">
+<title>Node49&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M2243.8645,-619.1565C2132.7128,-602.2425 1812.7142,-548.4695 1561.0432,-456 1481.3344,-426.7132 1398.345,-369.5894 1356.573,-338.6428"/>
+<polygon fill="#191970" stroke="#191970" points="1358.6457,-335.8224 1348.541,-332.6372 1354.4539,-341.4286 1358.6457,-335.8224"/>
 </g>
+<!-- Node49&#45;&gt;Node10 -->
+<g id="edge122" class="edge">
+<title>Node49&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2324.2738,-623.1666C2442.9867,-617.573 2790.4654,-599.8315 2902.0432,-579 3086.9661,-544.4751 3132.5457,-526.2784 3307.0432,-456 3357.9308,-435.5052 3377.3881,-437.9084 3417.0432,-400 3450.8113,-367.7194 3466.9608,-347.1696 3455.0432,-302 3446.3098,-268.899 3439.7666,-260.6047 3417.0432,-235 3345.7042,-154.615 3223.7022,-109.3264 3156.9969,-89.4037"/>
+<polygon fill="#191970" stroke="#191970" points="3157.7558,-85.9791 3147.1755,-86.5347 3155.793,-92.6983 3157.7558,-85.9791"/>
 </g>
-<!-- Node58&#45;&gt;Node66 -->
-<g id="edge164" class="edge">
-<title>Node58&#45;&gt;Node66</title>
-<path fill="none" stroke="#191970" d="M1597.3896,-945.9591C1584.9608,-943.7008 1571.5092,-941.5122 1559,-940 1314.8154,-910.4821 678.8791,-952.0304 456,-848 432.8955,-837.2158 412.646,-815.5648 400.1614,-799.9982"/>
-<polygon fill="#191970" stroke="#191970" points="402.6188,-797.4527 393.745,-791.6641 397.0721,-801.723 402.6188,-797.4527"/>
+<!-- Node49&#45;&gt;Node16 -->
+<g id="edge123" class="edge">
+<title>Node49&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2324.1524,-622.341C2494.5445,-609.4977 3142.093,-544.1997 2974.0432,-302 2850.5589,-124.0297 2110.9589,-36.2572 1946.4533,-18.7928"/>
+<polygon fill="#191970" stroke="#191970" points="1946.7728,-15.3072 1936.462,-17.7439 1946.0419,-22.2689 1946.7728,-15.3072"/>
 </g>
-<!-- Node67 -->
-<g id="node46" class="node">
-<title>Node67</title>
-<g id="a_node46"><a xlink:href="target__kind_8h.html" target="_top" xlink:title="Target kind registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="971,-884.5 971,-903.5 1107,-903.5 1107,-884.5 971,-884.5"/>
-<text text-anchor="middle" x="1039" y="-891.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target_kind.h</text>
-</a>
+<!-- Node49&#45;&gt;Node28 -->
+<g id="edge121" class="edge">
+<title>Node49&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M2324.138,-624.1753C2526.9609,-619.9112 3431.0847,-599.7607 3451.0432,-579 3555.0986,-470.7622 3320.7221,-314.1801 3240.6404,-265.5074"/>
+<polygon fill="#191970" stroke="#191970" points="3242.0464,-262.2689 3231.6724,-260.1171 3238.4403,-268.2686 3242.0464,-262.2689"/>
 </g>
+<!-- Node50&#45;&gt;Node18 -->
+<g id="edge134" class="edge">
+<title>Node50&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1081.9464,-235.428C1021.1043,-201.9935 868.4369,-120.3596 734.0432,-67 688.6763,-48.9875 634.361,-33.1755 600.5211,-23.9752"/>
+<polygon fill="#191970" stroke="#191970" points="601.2747,-20.5536 590.7089,-21.3371 599.4572,-27.3136 601.2747,-20.5536"/>
 </g>
-<!-- Node58&#45;&gt;Node67 -->
-<g id="edge167" class="edge">
-<title>Node58&#45;&gt;Node67</title>
-<path fill="none" stroke="#191970" d="M1596.0239,-945.954C1583.9717,-943.776 1571.0408,-941.6269 1559,-940 1403.1364,-918.9405 1219.6311,-905.2944 1117.5852,-898.6884"/>
-<polygon fill="#191970" stroke="#191970" points="1117.5676,-895.1802 1107.364,-898.0327 1117.1194,-902.1658 1117.5676,-895.1802"/>
+<!-- Node50&#45;&gt;Node20 -->
+<g id="edge135" class="edge">
+<title>Node50&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1172.3992,-243.6428C1198.591,-240.8658 1229.2712,-237.6872 1257.0432,-235 1439.7717,-217.3191 1659.0239,-198.7047 1741.4977,-191.7944"/>
+<polygon fill="#191970" stroke="#191970" points="1741.8666,-195.2758 1751.5399,-190.9542 1741.2829,-188.3002 1741.8666,-195.2758"/>
 </g>
-<!-- Node59&#45;&gt;Node19 -->
-<g id="edge155" class="edge">
-<title>Node59&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M1656.9261,-772.3935C1613.1605,-758.0216 1544,-726.3894 1544,-670 1544,-670 1544,-670 1544,-614 1544,-512.91 1563.1822,-393.6161 1572.297,-342.6377"/>
-<polygon fill="#191970" stroke="#191970" points="1575.785,-343.0183 1574.1292,-332.5537 1568.8978,-341.7669 1575.785,-343.0183"/>
+<!-- Node50&#45;&gt;Node24 -->
+<g id="edge136" class="edge">
+<title>Node50&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M1128.5713,-235.2977C1140.337,-226.1384 1155.3107,-214.4816 1167.2897,-205.1562"/>
+<polygon fill="#191970" stroke="#191970" points="1169.6535,-207.7516 1175.3943,-198.8469 1165.3535,-202.228 1169.6535,-207.7516"/>
 </g>
-<!-- Node59&#45;&gt;Node16 -->
-<g id="edge161" class="edge">
-<title>Node59&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1644.2551,-779.6455C1522.1869,-773.4126 1200.947,-755.6111 1096,-736 926.2737,-704.2839 875.378,-707.0521 724,-624 590.277,-550.6341 567.6319,-511.9143 464,-400 407.9518,-339.4724 354,-332.9923 354,-250.5 354,-250.5 354,-250.5 354,-133 354,-71.4381 432.3309,-38.2206 478.3354,-24.0203"/>
-<polygon fill="#191970" stroke="#191970" points="479.5688,-27.3057 488.1709,-21.1207 477.5892,-20.5914 479.5688,-27.3057"/>
+<!-- Node51&#45;&gt;Node3 -->
+<g id="edge154" class="edge">
+<title>Node51&#45;&gt;Node3</title>
+<path fill="none" stroke="#191970" d="M1058.1437,-792.7004C1212.7589,-791.3765 1688.1335,-784.2982 1837.0432,-747 1857.4888,-741.8789 1859.859,-733.0699 1880.0432,-727 1983.3338,-695.938 2013.6659,-708.8227 2120.0432,-691 2121.3391,-690.7829 2122.652,-690.5602 2123.9763,-690.3331"/>
+<polygon fill="#191970" stroke="#191970" points="2125.015,-693.7043 2134.2587,-688.527 2123.804,-686.8098 2125.015,-693.7043"/>
 </g>
-<!-- Node59&#45;&gt;Node18 -->
-<g id="edge162" class="edge">
-<title>Node59&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1739.5236,-777.1728C1899.0216,-759.8729 2410.3628,-695.1399 2502,-568 2535.0846,-522.0975 2706.1189,-696.0654 2460,-369 2454.9458,-362.2835 2241.1745,-244.914 2165.3403,-203.4023"/>
-<polygon fill="#191970" stroke="#191970" points="2166.8468,-200.2369 2156.3942,-198.5068 2163.4865,-206.3777 2166.8468,-200.2369"/>
+<!-- Node51&#45;&gt;Node21 -->
+<g id="edge155" class="edge">
+<title>Node51&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M1013.7613,-783.3731C1034.9741,-740.5287 1124.8184,-563.6825 1224.0432,-436 1251.1682,-401.0956 1285.6504,-363.2804 1307.5415,-339.9864"/>
+<polygon fill="#191970" stroke="#191970" points="1310.1315,-342.3415 1314.454,-332.6685 1305.0428,-337.5347 1310.1315,-342.3415"/>
 </g>
-<!-- Node59&#45;&gt;Node14 -->
+<!-- Node51&#45;&gt;Node16 -->
 <g id="edge158" class="edge">
-<title>Node59&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1739.5095,-780.21C1886.9722,-774.4988 2335.592,-755.9142 2480,-736 2508.7903,-732.0298 2964.9785,-636.9432 2991,-624 3015.9346,-611.5975 3162.441,-479.7401 3177,-456 3194.524,-427.4253 3201,-418.0202 3201,-384.5 3201,-384.5 3201,-384.5 3201,-133 3201,-101.7875 3199.9066,-88.2017 3177,-67 3154.7648,-46.4197 3070.8503,-29.103 3024.0461,-20.7896"/>
-<polygon fill="#191970" stroke="#191970" points="3024.5582,-17.3262 3014.1063,-19.0604 3023.3584,-24.2226 3024.5582,-17.3262"/>
-</g>
-<!-- Node59&#45;&gt;Node31 -->
-<g id="edge157" class="edge">
-<title>Node59&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1644.4458,-780.3728C1567.5344,-776.8741 1420.281,-766.3153 1379,-736 1285.9129,-667.6402 1266,-617.4914 1266,-502 1266,-502 1266,-502 1266,-446 1266,-374.1508 1325.3718,-306.2628 1360.2415,-272.5873"/>
-<polygon fill="#191970" stroke="#191970" points="1362.8293,-274.9575 1367.6964,-265.5467 1358.023,-269.8683 1362.8293,-274.9575"/>
+<title>Node51&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M972.0692,-783.4511C951.5778,-776.4348 927.0778,-764.9348 911.0432,-747 876.7717,-708.6671 886.5805,-686.2188 882.0432,-635 858.9839,-374.6921 801.1458,-185.5357 1034.0432,-67 1110.7192,-27.9749 1731.6988,-17.7723 1881.5769,-15.8681"/>
+<polygon fill="#191970" stroke="#191970" points="1881.972,-19.3636 1891.928,-15.7405 1881.8856,-12.3642 1881.972,-19.3636"/>
 </g>
-<!-- Node59&#45;&gt;Node34 -->
+<!-- Node51&#45;&gt;Node17 -->
 <g id="edge159" class="edge">
-<title>Node59&#45;&gt;Node34</title>
-<path fill="none" stroke="#191970" d="M1644.468,-779.7697C1518.9127,-773.636 1184.881,-755.6592 1139,-736 869.615,-620.5732 674.0051,-289.2793 628.8959,-207.4449"/>
-<polygon fill="#191970" stroke="#191970" points="631.9017,-205.6461 624.0425,-198.5409 625.7554,-208.9964 631.9017,-205.6461"/>
-</g>
-<!-- Node59&#45;&gt;Node48 -->
-<g id="edge148" class="edge">
-<title>Node59&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1644.4012,-776.2179C1559.5872,-765.9152 1382.5656,-744.4114 1290.1064,-733.18"/>
-<polygon fill="#191970" stroke="#191970" points="1290.4232,-729.6928 1280.074,-731.9613 1289.579,-736.6417 1290.4232,-729.6928"/>
-</g>
-<!-- Node59&#45;&gt;Node51 -->
-<g id="edge147" class="edge">
-<title>Node59&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1685.5203,-772.4509C1672.8444,-753.7707 1644.584,-712.1238 1628.266,-688.0761"/>
-<polygon fill="#191970" stroke="#191970" points="1630.9745,-685.8344 1622.4633,-679.5249 1625.1822,-689.7649 1630.9745,-685.8344"/>
+<title>Node51&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1058.306,-791.4185C1361.3799,-781.4491 2976.0433,-724.8209 3462.0432,-635 3648.0235,-600.6277 3806.0432,-573.6299 3806.0432,-384.5 3806.0432,-384.5 3806.0432,-384.5 3806.0432,-250.5 3806.0432,-168.6145 3750.0866,-150.7408 3673.0432,-123 3405.151,-26.5408 3320.8367,-71.7809 3039.0432,-31 3018.5725,-28.0375 2995.8993,-24.7535 2976.7127,-21.9736"/>
+<polygon fill="#191970" stroke="#191970" points="2977.1373,-18.4987 2966.7387,-20.5284 2976.1334,-25.4263 2977.1373,-18.4987"/>
 </g>
-<!-- Node59&#45;&gt;Node53 -->
-<g id="edge149" class="edge">
-<title>Node59&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1727.1611,-772.4925C1792.8442,-754.6849 1938.6103,-714.9436 2061,-680 2124.7876,-661.7879 2198.544,-639.894 2243.467,-626.4649"/>
-<polygon fill="#191970" stroke="#191970" points="2244.5708,-629.7881 2253.148,-623.5685 2242.5644,-623.0818 2244.5708,-629.7881"/>
+<!-- Node51&#45;&gt;Node33 -->
+<g id="edge157" class="edge">
+<title>Node51&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M987.2557,-783.466C954.4192,-767.4478 896.0432,-731.8238 896.0432,-681 896.0432,-681 896.0432,-681 896.0432,-563.5 896.0432,-388.0696 1132.3741,-301.0138 1255.8419,-267.5032"/>
+<polygon fill="#191970" stroke="#191970" points="1256.8563,-270.8551 1265.6176,-264.8978 1255.0535,-264.0912 1256.8563,-270.8551"/>
 </g>
-<!-- Node59&#45;&gt;Node40 -->
+<!-- Node51&#45;&gt;Node42 -->
 <g id="edge156" class="edge">
-<title>Node59&#45;&gt;Node40</title>
-<path fill="none" stroke="#191970" d="M1644.2147,-779.7963C1523.7758,-773.9559 1214.4195,-757.074 1173,-736 1063.5549,-680.315 830.1517,-363.2995 765.757,-273.9429"/>
-<polygon fill="#191970" stroke="#191970" points="768.4111,-271.6387 759.7323,-265.5618 762.7273,-275.7245 768.4111,-271.6387"/>
+<title>Node51&#45;&gt;Node42</title>
+<path fill="none" stroke="#191970" d="M959.9445,-785.9921C925.7257,-779.5583 879.9573,-767.7646 844.0432,-747 813.2983,-729.2241 808.6664,-719.1722 787.0432,-691 713.9724,-595.798 697.1731,-567.9937 654.0432,-456 639.4246,-418.0403 630.7686,-371.4428 626.4643,-343.1337"/>
+<polygon fill="#191970" stroke="#191970" points="629.8776,-342.2829 624.9765,-332.8899 622.9503,-343.2891 629.8776,-342.2829"/>
 </g>
-<!-- Node60 -->
+<!-- Node52 -->
 <g id="node41" class="node">
-<title>Node60</title>
-<g id="a_node41"><a xlink:href="ir_2adt_8h.html" target="_top" xlink:title="Algebraic data type definitions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1773,-716.5 1773,-735.5 1847,-735.5 1847,-716.5 1773,-716.5"/>
-<text text-anchor="middle" x="1810" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/adt.h</text>
+<title>Node52</title>
+<g id="a_node41"><a xlink:href="ir_2attrs_8h.html" target="_top" xlink:title="Helpers for attribute objects. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1748.0432,-727.5 1748.0432,-746.5 1828.0432,-746.5 1828.0432,-727.5 1748.0432,-727.5"/>
+<text text-anchor="middle" x="1788.0432" y="-734.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/attrs.h</text>
 </a>
 </g>
 </g>
-<!-- Node59&#45;&gt;Node60 -->
-<g id="edge139" class="edge">
-<title>Node59&#45;&gt;Node60</title>
-<path fill="none" stroke="#191970" d="M1712.2849,-772.3733C1731.1421,-763.4241 1759.446,-749.9917 1780.7455,-739.8835"/>
-<polygon fill="#191970" stroke="#191970" points="1782.4461,-742.9506 1789.9798,-735.5011 1779.4449,-736.6266 1782.4461,-742.9506"/>
+<!-- Node51&#45;&gt;Node52 -->
+<g id="edge143" class="edge">
+<title>Node51&#45;&gt;Node52</title>
+<path fill="none" stroke="#191970" d="M1058.0683,-789.4757C1197.761,-779.4336 1596.8429,-750.7448 1737.9042,-740.6043"/>
+<polygon fill="#191970" stroke="#191970" points="1738.2783,-744.0866 1748.0016,-739.8785 1737.7763,-737.1046 1738.2783,-744.0866"/>
 </g>
-<!-- Node62 -->
-<g id="node42" class="node">
-<title>Node62</title>
-<g id="a_node42"><a xlink:href="source__map_8h.html" target="_top" xlink:title="A map from source names to source code. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2503.5,-604.5 2503.5,-623.5 2644.5,-623.5 2644.5,-604.5 2503.5,-604.5"/>
-<text text-anchor="middle" x="2574" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/parser/source_map.h</text>
-</a>
+<!-- Node52&#45;&gt;Node3 -->
+<g id="edge144" class="edge">
+<title>Node52&#45;&gt;Node3</title>
+<path fill="none" stroke="#191970" d="M1828.1062,-729.3306C1832.7982,-728.5066 1837.5223,-727.7108 1842.0432,-727 1965.1186,-707.6499 1996.9679,-710.3501 2120.0432,-691 2121.3413,-690.7959 2122.656,-690.5848 2123.9819,-690.3681"/>
+<polygon fill="#191970" stroke="#191970" points="2125.0019,-693.745 2134.2735,-688.6179 2123.8282,-686.8441 2125.0019,-693.745"/>
+</g>
+<!-- Node52&#45;&gt;Node7 -->
+<g id="edge145" class="edge">
+<title>Node52&#45;&gt;Node7</title>
+<path fill="none" stroke="#191970" d="M1815.8625,-727.4168C1833.6142,-720.0015 1855.8711,-708.087 1870.0432,-691 1946.3994,-598.9396 1875.0583,-524.0161 1956.0432,-436 1970.2557,-420.5536 1989.9018,-409.5271 2009.1379,-401.75"/>
+<polygon fill="#191970" stroke="#191970" points="2010.4153,-405.0089 2018.5225,-398.188 2007.9312,-398.4644 2010.4153,-405.0089"/>
+</g>
+<!-- Node52&#45;&gt;Node18 -->
+<g id="edge152" class="edge">
+<title>Node52&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1747.818,-734.3658C1663.338,-728.5911 1461.3551,-713.5106 1293.0432,-691 1025.8965,-655.2709 953.6339,-661.4936 697.0432,-579 569.9642,-538.1442 542.0062,-516.5482 423.0432,-456 249.6208,-367.7338 76.0432,-383.5926 76.0432,-189 76.0432,-189 76.0432,-189 76.0432,-133 76.0432,-38.3439 424.9168,-19.7847 535.1075,-16.2836"/>
+<polygon fill="#191970" stroke="#191970" points="535.4079,-19.7763 545.2998,-15.9817 535.2005,-12.7794 535.4079,-19.7763"/>
+</g>
+<!-- Node52&#45;&gt;Node20 -->
+<g id="edge153" class="edge">
+<title>Node52&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1786.897,-727.4281C1784.685,-708.0998 1780.0432,-663.0133 1780.0432,-625 1780.0432,-625 1780.0432,-625 1780.0432,-563.5 1780.0432,-427.8239 1776.7215,-264.5235 1775.493,-208.7655"/>
+<polygon fill="#191970" stroke="#191970" points="1778.9873,-208.4675 1775.2645,-198.5483 1771.989,-208.6241 1778.9873,-208.4675"/>
 </g>
+<!-- Node52&#45;&gt;Node16 -->
+<g id="edge149" class="edge">
+<title>Node52&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1747.8791,-735.1993C1634.8959,-727.9331 1311.2932,-691.653 1138.0432,-512 1029.5532,-399.5003 995.7856,-332.5261 1025.0432,-179 1036.3515,-119.6611 1046.9945,-91.8722 1102.0432,-67 1173.9461,-34.5127 1738.8627,-19.5043 1881.4472,-16.2112"/>
+<polygon fill="#191970" stroke="#191970" points="1881.7972,-19.7042 1891.7149,-15.9775 1881.6379,-12.706 1881.7972,-19.7042"/>
 </g>
-<!-- Node59&#45;&gt;Node62 -->
+<!-- Node52&#45;&gt;Node17 -->
 <g id="edge150" class="edge">
-<title>Node59&#45;&gt;Node62</title>
-<path fill="none" stroke="#191970" d="M1739.7302,-781.0576C1896.0465,-777.7028 2386.3122,-764.9595 2451,-736 2501.5601,-713.3652 2543.0352,-660.1511 2562.3273,-632.0745"/>
-<polygon fill="#191970" stroke="#191970" points="2565.3476,-633.8543 2568.0005,-623.597 2559.5301,-629.9612 2565.3476,-633.8543"/>
+<title>Node52&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1828.3767,-735.3539C2063.3456,-725.6458 3250.6368,-674.9264 3408.0432,-635 3596.9832,-587.0751 3768.0432,-579.4233 3768.0432,-384.5 3768.0432,-384.5 3768.0432,-384.5 3768.0432,-250.5 3768.0432,-166.9962 3710.0788,-146.816 3630.0432,-123 3503.8683,-85.4544 3466.2369,-106.4777 3336.0432,-87 3205.0147,-67.3974 3051.1164,-38.5351 2976.7513,-24.2169"/>
+<polygon fill="#191970" stroke="#191970" points="2977.2787,-20.7542 2966.7966,-22.2956 2975.952,-27.6274 2977.2787,-20.7542"/>
 </g>
-<!-- Node59&#45;&gt;Node65 -->
-<g id="edge160" class="edge">
-<title>Node59&#45;&gt;Node65</title>
-<path fill="none" stroke="#191970" d="M1739.6084,-780.7215C1916.8061,-775.835 2538.0416,-757.5286 2735,-736 2736.264,-735.8618 2737.5424,-735.7129 2738.8306,-735.5546"/>
-<polygon fill="#191970" stroke="#191970" points="2739.3941,-739.0101 2748.8241,-734.1805 2738.4405,-732.0754 2739.3941,-739.0101"/>
+<!-- Node52&#45;&gt;Node29 -->
+<g id="edge146" class="edge">
+<title>Node52&#45;&gt;Node29</title>
+<path fill="none" stroke="#191970" d="M1818.0355,-727.4419C1839.4146,-719.721 1868.132,-707.4357 1890.0432,-691 2009.3204,-601.5301 1985.9604,-521.6019 2108.0432,-436 2129.1631,-421.1912 2155.0755,-410.0289 2178.6586,-401.9498"/>
+<polygon fill="#191970" stroke="#191970" points="2179.8547,-405.2408 2188.2551,-398.7845 2177.662,-398.5931 2179.8547,-405.2408"/>
 </g>
-<!-- Node60&#45;&gt;Node3 -->
-<g id="edge142" class="edge">
-<title>Node60&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1825.8125,-716.375C1883.6121,-681.1927 2083.6991,-559.4006 2153.6378,-516.8292"/>
-<polygon fill="#191970" stroke="#191970" points="2155.5242,-519.7784 2162.2464,-511.5891 2151.8846,-513.799 2155.5242,-519.7784"/>
+<!-- Node52&#45;&gt;Node36 -->
+<g id="edge151" class="edge">
+<title>Node52&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M1747.7428,-732.4336C1565.3753,-711.3002 817.3085,-619.4387 610.0432,-512 537.4358,-474.3629 508.0612,-468.9261 464.0432,-400 423.9258,-337.1816 418.2045,-307.8333 434.0432,-235 436.0857,-225.6079 440.2221,-215.8357 444.2879,-207.7264"/>
+<polygon fill="#191970" stroke="#191970" points="447.4341,-209.2651 449.0667,-198.7968 441.2623,-205.9622 447.4341,-209.2651"/>
 </g>
-<!-- Node60&#45;&gt;Node19 -->
-<g id="edge143" class="edge">
-<title>Node60&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M1805.4141,-716.1968C1792.64,-689.0582 1755.269,-610.8011 1720,-548 1676.5874,-470.6982 1619.6901,-382.435 1592.6193,-341.1378"/>
-<polygon fill="#191970" stroke="#191970" points="1595.4246,-339.0336 1587.0077,-332.5989 1589.5748,-342.878 1595.4246,-339.0336"/>
+<!-- Node52&#45;&gt;Node40 -->
+<g id="edge148" class="edge">
+<title>Node52&#45;&gt;Node40</title>
+<path fill="none" stroke="#191970" d="M1791.2988,-727.4385C1804.8463,-688.5011 1859.6801,-539.0564 1937.0432,-436 1963.4816,-400.7811 1976.2944,-397.2908 2010.0432,-369 2056.115,-330.3793 2112.8046,-288.3527 2143.2458,-266.2055"/>
+<polygon fill="#191970" stroke="#191970" points="2145.417,-268.9544 2151.4578,-260.2504 2141.3076,-263.2876 2145.417,-268.9544"/>
 </g>
-<!-- Node60&#45;&gt;Node8 -->
-<g id="edge145" class="edge">
-<title>Node60&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1812.4049,-716.3385C1825.3901,-664.0133 1888.133,-409.1744 1931,-199 1937.8085,-165.6183 1943.57,-156.9718 1941,-123 1940.3464,-114.361 1939.0119,-104.8868 1937.6817,-96.7935"/>
-<polygon fill="#191970" stroke="#191970" points="1941.1078,-96.0674 1935.9347,-86.8213 1934.2128,-97.2753 1941.1078,-96.0674"/>
+<!-- Node52&#45;&gt;Node41 -->
+<g id="edge147" class="edge">
+<title>Node52&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M1822.3137,-727.4911C1848.5297,-719.5365 1885.1379,-706.9346 1915.0432,-691 2047.2368,-620.5627 2056.1593,-564.8419 2187.0432,-492 2255.815,-453.7259 2341.0309,-421.7447 2396.1903,-402.8696"/>
+<polygon fill="#191970" stroke="#191970" points="2397.4451,-406.14 2405.7907,-399.613 2395.1964,-399.511 2397.4451,-406.14"/>
 </g>
-<!-- Node60&#45;&gt;Node14 -->
-<g id="edge146" class="edge">
-<title>Node60&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1847.1125,-723.3204C1933.2695,-716.9775 2153.493,-700.0607 2337,-680 2649.898,-645.7945 2751.765,-668.262 3025,-512 3087.5155,-476.2476 3125,-456.5168 3125,-384.5 3125,-384.5 3125,-384.5 3125,-133 3125,-101.7875 3122.0252,-90.0686 3101,-67 3080.522,-44.5318 3048.1921,-30.9807 3024.0518,-23.4609"/>
-<polygon fill="#191970" stroke="#191970" points="3024.8462,-20.0467 3014.2658,-20.599 3022.8813,-26.7653 3024.8462,-20.0467"/>
+<!-- Node54&#45;&gt;Node4 -->
+<g id="edge162" class="edge">
+<title>Node54&#45;&gt;Node4</title>
+<path fill="none" stroke="#191970" d="M1713.6341,-617.7036C1835.1815,-605.1403 2081.1431,-579.7174 2187.3346,-568.7413"/>
+<polygon fill="#191970" stroke="#191970" points="2187.8812,-572.2036 2197.4683,-567.6939 2187.1614,-565.2406 2187.8812,-572.2036"/>
 </g>
-<!-- Node60&#45;&gt;Node31 -->
-<g id="edge144" class="edge">
-<title>Node60&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1801.1233,-716.0685C1748.9277,-657.6709 1481.6536,-358.6384 1405.3582,-273.2773"/>
-<polygon fill="#191970" stroke="#191970" points="1407.8583,-270.8223 1398.5846,-265.6988 1402.6391,-275.4872 1407.8583,-270.8223"/>
+<!-- Node54&#45;&gt;Node18 -->
+<g id="edge171" class="edge">
+<title>Node54&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1572.1323,-619.891C1275.8231,-596.639 152.0432,-488.698 152.0432,-189 152.0432,-189 152.0432,-189 152.0432,-133 152.0432,-86.5625 190.6062,-85.8558 233.0432,-67 286.9872,-43.0314 462.1407,-24.9999 535.3812,-18.3214"/>
+<polygon fill="#191970" stroke="#191970" points="535.7746,-21.8003 545.4211,-17.4191 535.148,-14.8284 535.7746,-21.8003"/>
 </g>
-<!-- Node60&#45;&gt;Node51 -->
-<g id="edge140" class="edge">
-<title>Node60&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1776.6502,-716.3733C1743.972,-706.9404 1694.0382,-692.5265 1658.523,-682.2747"/>
-<polygon fill="#191970" stroke="#191970" points="1659.493,-678.9119 1648.9146,-679.5011 1657.5516,-685.6373 1659.493,-678.9119"/>
+<!-- Node54&#45;&gt;Node20 -->
+<g id="edge172" class="edge">
+<title>Node54&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1647.7497,-615.4036C1659.8836,-590.0954 1692.1116,-519.1402 1704.0432,-456 1722.2883,-359.4503 1663.7106,-321.6487 1710.0432,-235 1717.2823,-221.462 1730.4716,-211.0134 1743.0869,-203.5215"/>
+<polygon fill="#191970" stroke="#191970" points="1744.9956,-206.4664 1752.0781,-198.5868 1741.6276,-200.3299 1744.9956,-206.4664"/>
 </g>
-<!-- Node60&#45;&gt;Node53 -->
-<g id="edge141" class="edge">
-<title>Node60&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1847.2725,-717.2115C1931.9953,-697.2348 2139.0952,-648.4028 2234.8881,-625.8159"/>
-<polygon fill="#191970" stroke="#191970" points="2235.9749,-629.1557 2244.9048,-623.454 2234.3684,-622.3425 2235.9749,-629.1557"/>
+<!-- Node54&#45;&gt;Node16 -->
+<g id="edge170" class="edge">
+<title>Node54&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1624.443,-615.3966C1526.2752,-564.5315 1071.5928,-326.6246 1037.0432,-266 1016.6776,-230.2642 1025.2899,-170.541 1063.0432,-123 1098.1484,-78.7937 1122.5373,-81.6858 1177.0432,-67 1312.8901,-30.3981 1757.1337,-18.7085 1881.4528,-16.1131"/>
+<polygon fill="#191970" stroke="#191970" points="1881.8908,-19.605 1891.8176,-15.9025 1881.7485,-12.6065 1881.8908,-19.605"/>
 </g>
-<!-- Node62&#45;&gt;Node16 -->
-<g id="edge153" class="edge">
-<title>Node62&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2503.2255,-608.9859C2285.2553,-592.4873 1605.8298,-533.1521 1059,-400 827.8951,-343.7264 708.4202,-387.2264 563,-199 524.4567,-149.111 514.482,-71.93 511.9009,-35.6479"/>
-<polygon fill="#191970" stroke="#191970" points="515.3788,-35.168 511.2959,-25.3915 508.3909,-35.5803 515.3788,-35.168"/>
+<!-- Node54&#45;&gt;Node41 -->
+<g id="edge163" class="edge">
+<title>Node54&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M1661.9603,-615.3824C1724.0847,-584.223 1928.1714,-485.2234 2108.0432,-436 2158.7708,-422.118 2299.4767,-403.3663 2384.7136,-392.7431"/>
+<polygon fill="#191970" stroke="#191970" points="2385.3306,-396.1935 2394.8236,-391.4887 2384.4687,-389.2467 2385.3306,-396.1935"/>
 </g>
-<!-- Node62&#45;&gt;Node18 -->
-<g id="edge154" class="edge">
-<title>Node62&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2581.4545,-604.0934C2603.8499,-573.3145 2669,-475.9196 2669,-384.5 2669,-384.5 2669,-384.5 2669,-317.5 2669,-215.0133 2289.9666,-193.9687 2172.974,-189.9141"/>
-<polygon fill="#191970" stroke="#191970" points="2172.9115,-186.4102 2162.8026,-189.582 2172.6831,-193.4065 2172.9115,-186.4102"/>
+<!-- Node55 -->
+<g id="node43" class="node">
+<title>Node55</title>
+<g id="a_node43"><a xlink:href="registry_8h.html" target="_top" xlink:title="This file defines the TVM global function registry. ">
+<polygon fill="#ffffff" stroke="#000000" points="1570.5432,-436.5 1570.5432,-455.5 1695.5432,-455.5 1695.5432,-436.5 1570.5432,-436.5"/>
+<text text-anchor="middle" x="1633.0432" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/registry.h</text>
+</a>
 </g>
-<!-- Node62&#45;&gt;Node14 -->
-<g id="edge152" class="edge">
-<title>Node62&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2585.1721,-604.3733C2595.6611,-595.2307 2611.6871,-580.9909 2625,-568 2658.3011,-535.5043 2658.0545,-517.4595 2697,-492 2736.538,-466.1532 2755.328,-478.243 2797,-456 2834.0143,-436.2431 2846.3195,-432.3813 2873,-400 2938.9915,-319.9082 2973,-292.7766 2973,-189 2973,-189 2973,-189 2973,-133 2973,-98.1936 2981.2253,-58.3226 2986.8521,-35.1197"/>
-<polygon fill="#191970" stroke="#191970" points="2990.3113,-35.7152 2989.3581,-25.1633 2983.523,-34.0065 2990.3113,-35.7152"/>
 </g>
-<!-- Node62&#45;&gt;Node52 -->
-<g id="edge151" class="edge">
-<title>Node62&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M2553.0275,-604.3733C2533.4433,-595.3838 2504.0039,-581.8706 2481.949,-571.7471"/>
-<polygon fill="#191970" stroke="#191970" points="2483.2473,-568.4919 2472.6989,-567.5011 2480.3271,-574.8538 2483.2473,-568.4919"/>
+<!-- Node54&#45;&gt;Node55 -->
+<g id="edge164" class="edge">
+<title>Node54&#45;&gt;Node55</title>
+<path fill="none" stroke="#191970" d="M1642.5056,-615.3762C1640.9321,-587.2097 1636.3121,-504.5126 1634.1645,-466.0713"/>
+<polygon fill="#191970" stroke="#191970" points="1637.636,-465.4606 1633.5835,-455.6714 1630.6469,-465.8511 1637.636,-465.4606"/>
+</g>
+<!-- Node55&#45;&gt;Node18 -->
+<g id="edge168" class="edge">
+<title>Node55&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1570.2808,-442.1665C1349.8189,-428.3502 619.088,-379.4651 525.0432,-333 445.7581,-293.8272 364.5264,-260.4201 399.0432,-179 428.3985,-109.7555 503.0272,-55.4986 542.5643,-30.5191"/>
+<polygon fill="#191970" stroke="#191970" points="544.6907,-33.3194 551.3552,-25.0832 541.0091,-27.3657 544.6907,-33.3194"/>
+</g>
+<!-- Node55&#45;&gt;Node20 -->
+<g id="edge169" class="edge">
+<title>Node55&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1631.9123,-436.3402C1628.6689,-403.9863 1622.5097,-297.9387 1672.0432,-235 1689.1352,-213.2825 1718.6817,-201.5303 1741.7582,-195.356"/>
+<polygon fill="#191970" stroke="#191970" points="1742.6356,-198.7447 1751.5166,-192.9672 1740.9712,-191.9454 1742.6356,-198.7447"/>
 </g>
-<!-- Node66&#45;&gt;Node16 -->
+<!-- Node55&#45;&gt;Node16 -->
 <g id="edge166" class="edge">
-<title>Node66&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M371.3882,-772.4562C357.8598,-763.8078 338.1352,-750.2658 323,-736 216.0358,-635.1794 164,-592.9902 164,-446 164,-446 164,-446 164,-133 164,-101.7875 163.9288,-86.8696 188,-67 231.9311,-30.737 405.3957,-19.6819 478.374,-16.5962"/>
-<polygon fill="#191970" stroke="#191970" points="478.5323,-20.0927 488.3848,-16.1967 478.2532,-13.0983 478.5323,-20.0927"/>
+<title>Node55&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1620.7311,-436.395C1587.3269,-410.7874 1491.4177,-340.2349 1401.0432,-302 1340.2872,-276.2958 1318.4624,-290.0767 1257.0432,-266 1199.4495,-243.4229 1185.1532,-235.2734 1135.0432,-199 1100.1827,-173.7653 1046.612,-155.3065 1075.0432,-123 1129.5249,-61.0923 1734.3661,-25.0598 1881.7876,-17.1583"/>
+<polygon fill="#191970" stroke="#191970" points="1882.1795,-20.6426 1891.9801,-16.618 1881.8088,-13.6524 1882.1795,-20.6426"/>
 </g>
-<!-- Node50 -->
-<g id="node45" class="node">
-<title>Node50</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="332.5,-716.5 332.5,-735.5 427.5,-735.5 427.5,-716.5 332.5,-716.5"/>
-<text text-anchor="middle" x="380" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/common.h</text>
+<!-- Node55&#45;&gt;Node17 -->
+<g id="edge167" class="edge">
+<title>Node55&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1653.3613,-436.3363C1702.2986,-413.076 1830.6085,-352.1817 1938.0432,-302 2002.3642,-271.9563 2018.1166,-263.7113 2083.0432,-235 2199.9844,-183.2873 2225.8638,-160.7042 2348.0432,-123 2468.4765,-85.8347 2502.1048,-89.9064 2626.0432,-67 2718.4959,-49.9128 2827.1071,-32.1886 2886.8782,-22.6372"/>
+<polygon fill="#191970" stroke="#191970" points="2887.778,-26.038 2897.1022,-21.007 2886.6758,-19.1253 2887.778,-26.038"/>
 </g>
-<!-- Node66&#45;&gt;Node50 -->
+<!-- Node55&#45;&gt;Node41 -->
 <g id="edge165" class="edge">
-<title>Node66&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M385.7807,-772.2455C384.8673,-764.9382 383.5868,-754.6944 382.4631,-745.7046"/>
-<polygon fill="#191970" stroke="#191970" points="385.9187,-745.1313 381.2053,-735.6427 378.9728,-745.9996 385.9187,-745.1313"/>
+<title>Node55&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M1695.7377,-442.6073C1815.45,-435.9381 2084.8173,-420.0127 2311.0432,-400 2335.0363,-397.8775 2361.2636,-395.1344 2384.5762,-392.5471"/>
+<polygon fill="#191970" stroke="#191970" points="2385.1692,-396.0027 2394.7175,-391.4113 2384.39,-389.0462 2385.1692,-396.0027"/>
 </g>
-<!-- Node67&#45;&gt;Node3 -->
-<g id="edge179" class="edge">
-<title>Node67&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1084.0197,-884.4967C1151.6872,-870.4864 1283.7609,-844.2021 1397,-828 1607.9517,-797.8173 2205.7874,-794.2149 2334,-624 2371.252,-574.5443 2278.9001,-534.2975 2220.8493,-514.776"/>
-<polygon fill="#191970" stroke="#191970" points="2221.7432,-511.3859 2211.1505,-511.5983 2219.5637,-518.038 2221.7432,-511.3859"/>
-</g>
-<!-- Node67&#45;&gt;Node21 -->
-<g id="edge180" class="edge">
-<title>Node67&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1107.2073,-890.5901C1429.2031,-874.1718 2784.098,-801.3746 2844,-736 3003.4873,-561.9421 2667.0942,-482.3329 2460,-369 2244.4947,-251.0642 1946.294,-206.135 1840.3079,-193.2801"/>
-<polygon fill="#191970" stroke="#191970" points="1840.4219,-189.7692 1830.0794,-192.0675 1839.5978,-196.7205 1840.4219,-189.7692"/>
+<!-- Node58&#45;&gt;Node1 -->
+<g id="edge183" class="edge">
+<title>Node58&#45;&gt;Node1</title>
+<path fill="none" stroke="#191970" d="M2046.0253,-954.7978C1944.1479,-947.0374 1768.359,-932.6042 1618.0432,-915 1474.5144,-898.1906 1306.4726,-872.2378 1219.9917,-858.4175"/>
+<polygon fill="#191970" stroke="#191970" points="1220.2845,-854.9198 1209.8567,-856.7936 1219.177,-861.8317 1220.2845,-854.9198"/>
 </g>
-<!-- Node67&#45;&gt;Node16 -->
+<!-- Node58&#45;&gt;Node3 -->
 <g id="edge182" class="edge">
-<title>Node67&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M970.9842,-893.5429C745.6858,-891.1397 38,-876.1015 38,-782 38,-782 38,-782 38,-133 38,-42.1971 370.474,-21.0601 477.9334,-16.5999"/>
-<polygon fill="#191970" stroke="#191970" points="478.3313,-20.087 488.1862,-16.1972 478.0565,-13.0924 478.3313,-20.087"/>
+<title>Node58&#45;&gt;Node3</title>
+<path fill="none" stroke="#191970" d="M2148.1816,-951.454C2161.2031,-943.3582 2178.5993,-930.5886 2189.0432,-915 2206.0332,-889.6407 2208.0432,-879.5246 2208.0432,-849 2208.0432,-849 2208.0432,-849 2208.0432,-793 2208.0432,-758.9677 2193.4918,-721.5293 2183.4281,-699.7067"/>
+<polygon fill="#191970" stroke="#191970" points="2186.5371,-698.0954 2179.0601,-690.5891 2180.2242,-701.1198 2186.5371,-698.0954"/>
 </g>
-<!-- Node67&#45;&gt;Node18 -->
-<g id="edge183" class="edge">
-<title>Node67&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1107.0122,-891.6504C1263.4937,-886.0411 1661.0992,-870.6278 1993,-848 2275.6614,-828.7292 2348.301,-837.1366 2628,-792 2762.7668,-770.2519 2864.8468,-807.3539 2914,-680 2996.8029,-465.4613 2870.1906,-301.3269 2650,-235 2560.098,-207.9193 2271.7227,-194.2453 2172.7319,-190.2622"/>
-<polygon fill="#191970" stroke="#191970" points="2172.8145,-186.7628 2162.6841,-189.8652 2172.5381,-193.7573 2172.8145,-186.7628"/>
+<!-- Node58&#45;&gt;Node6 -->
+<g id="edge189" class="edge">
+<title>Node58&#45;&gt;Node6</title>
+<path fill="none" stroke="#191970" d="M2216.2005,-959.0746C2456.5635,-953.3946 3125.6168,-935.7964 3168.0432,-915 3202.5861,-898.068 3224.0432,-887.4695 3224.0432,-849 3224.0432,-849 3224.0432,-849 3224.0432,-563.5 3224.0432,-525.751 3201.6194,-486.4009 3186.5096,-464.1707"/>
+<polygon fill="#191970" stroke="#191970" points="3189.192,-461.9012 3180.5685,-455.7461 3183.4714,-465.9355 3189.192,-461.9012"/>
 </g>
-<!-- Node67&#45;&gt;Node34 -->
-<g id="edge181" class="edge">
-<title>Node67&#45;&gt;Node34</title>
-<path fill="none" stroke="#191970" d="M970.9521,-889.3904C800.264,-877.1305 364.5853,-841.2618 323,-792 220.1661,-670.1834 388.2527,-804.3098 577,-400 607.4985,-334.67 615.883,-247.8955 618.1625,-209.1599"/>
-<polygon fill="#191970" stroke="#191970" points="621.679,-208.9226 618.6916,-198.7577 614.688,-208.567 621.679,-208.9226"/>
+<!-- Node58&#45;&gt;Node21 -->
+<g id="edge190" class="edge">
+<title>Node58&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M2111.5855,-951.349C2048.6074,-920.6093 1843.9182,-824.5772 1663.0432,-783 1574.7692,-762.7087 1318.1965,-813.8156 1257.0432,-747 1202.094,-686.9628 1290.49,-424.1404 1319.9921,-342.0728"/>
+<polygon fill="#191970" stroke="#191970" points="1323.3212,-343.1588 1323.4385,-332.5646 1316.7402,-340.7734 1323.3212,-343.1588"/>
 </g>
-<!-- Node68 -->
-<g id="node47" class="node">
-<title>Node68</title>
-<g id="a_node47"><a xlink:href="ir_2transform_8h.html" target="_top" xlink:title="tvm/ir/transform.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="707,-828.5 707,-847.5 813,-847.5 813,-828.5 707,-828.5"/>
-<text text-anchor="middle" x="760" y="-835.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/transform.h</text>
-</a>
+<!-- Node58&#45;&gt;Node10 -->
+<g id="edge192" class="edge">
+<title>Node58&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2216.145,-959.5211C2479.6943,-954.7123 3271.5026,-938.4628 3385.0432,-915 3532.5025,-884.528 3630.0432,-831.5749 3630.0432,-681 3630.0432,-681 3630.0432,-681 3630.0432,-625 3630.0432,-514.6164 3692.0432,-494.8836 3692.0432,-384.5 3692.0432,-384.5 3692.0432,-384.5 3692.0432,-250.5 3692.0432,-112.1412 3540.7207,-162.241 3408.0432,-123 3366.5115,-110.7165 3254.1709,-95.0943 3179.908,-85.5812"/>
+<polygon fill="#191970" stroke="#191970" points="3180.0553,-82.0717 3169.6932,-84.2803 3179.1709,-89.0156 3180.0553,-82.0717"/>
 </g>
+<!-- Node58&#45;&gt;Node33 -->
+<g id="edge191" class="edge">
+<title>Node58&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M2045.8339,-957.2554C1918.835,-951.192 1672.4827,-937.5723 1464.0432,-915 1125.9132,-878.3832 997.2206,-980.3973 707.0432,-803 537.4525,-699.3222 658.8133,-491.8915 815.0432,-369 874.6723,-322.0955 898.9932,-322.4779 972.0432,-302 1023.9333,-287.4538 1167.4231,-269.1879 1255.8861,-258.7792"/>
+<polygon fill="#191970" stroke="#191970" points="1256.3354,-262.2506 1265.8605,-257.6115 1255.5213,-255.2981 1256.3354,-262.2506"/>
 </g>
-<!-- Node67&#45;&gt;Node68 -->
-<g id="edge168" class="edge">
-<title>Node67&#45;&gt;Node68</title>
-<path fill="none" stroke="#191970" d="M991.3552,-884.4369C943.2053,-874.7724 868.7729,-859.8326 817.4007,-849.5213"/>
-<polygon fill="#191970" stroke="#191970" points="817.9415,-846.0601 807.4483,-847.5237 816.5639,-852.9232 817.9415,-846.0601"/>
+<!-- Node58&#45;&gt;Node41 -->
+<g id="edge193" class="edge">
+<title>Node58&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M2216.0666,-956.1249C2461.55,-941.4905 3154.0432,-896.0393 3154.0432,-849 3154.0432,-849 3154.0432,-849 3154.0432,-681 3154.0432,-538.1618 3073.5167,-500.446 2946.0432,-436 2873.451,-399.3 2639.2304,-388.7188 2521.6776,-385.6944"/>
+<polygon fill="#191970" stroke="#191970" points="2521.4435,-382.1877 2511.3604,-385.4407 2521.2713,-389.1856 2521.4435,-382.1877"/>
 </g>
-<!-- Node73 -->
-<g id="node48" class="node">
-<title>Node73</title>
-<g id="a_node48"><a xlink:href="attr__registry__map_8h.html" target="_top" xlink:title="Attribute map used in registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="990,-302.5 990,-332.5 1116,-332.5 1116,-302.5 990,-302.5"/>
-<text text-anchor="start" x="998" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/attr_registry</text>
-<text text-anchor="middle" x="1053" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_map.h</text>
+<!-- Node59 -->
+<g id="node46" class="node">
+<title>Node59</title>
+<g id="a_node46"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="tvm/meta_schedule/arg\l_info.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="3310.0432,-548.5 3310.0432,-578.5 3442.0432,-578.5 3442.0432,-548.5 3310.0432,-548.5"/>
+<text text-anchor="start" x="3318.0432" y="-566.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/arg</text>
+<text text-anchor="middle" x="3376.0432" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_info.h</text>
 </a>
 </g>
 </g>
-<!-- Node67&#45;&gt;Node73 -->
-<g id="edge175" class="edge">
-<title>Node67&#45;&gt;Node73</title>
-<path fill="none" stroke="#191970" d="M970.7003,-889.3424C885.5464,-882.7528 745.4109,-869.1071 698,-848 662.8559,-832.354 648.1623,-826.9096 632,-792 534.4843,-581.3714 882.8822,-395.9447 1009.0665,-336.9849"/>
-<polygon fill="#191970" stroke="#191970" points="1010.8588,-340.0122 1018.4644,-332.6361 1007.9191,-333.6594 1010.8588,-340.0122"/>
+<!-- Node58&#45;&gt;Node59 -->
+<g id="edge184" class="edge">
+<title>Node58&#45;&gt;Node59</title>
+<path fill="none" stroke="#191970" d="M2216.3222,-959.5697C2440.2188,-955.4759 3041.0696,-942.2182 3238.0432,-915 3363.8378,-897.6175 3516.0432,-975.9898 3516.0432,-849 3516.0432,-849 3516.0432,-849 3516.0432,-681 3516.0432,-633.4101 3467.9931,-601.2531 3428.1927,-582.78"/>
+<polygon fill="#191970" stroke="#191970" points="3429.3675,-579.4718 3418.8086,-578.6 3426.5192,-585.8661 3429.3675,-579.4718"/>
 </g>
-<!-- Node68&#45;&gt;Node19 -->
-<g id="edge170" class="edge">
-<title>Node68&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M773.8085,-828.1886C845.6514,-777.3087 1184.5913,-539.3149 1477,-369 1496.1993,-357.8173 1518.0856,-346.4019 1536.478,-337.1697"/>
-<polygon fill="#191970" stroke="#191970" points="1538.0814,-340.2812 1545.4711,-332.6889 1534.9597,-334.0158 1538.0814,-340.2812"/>
+<!-- Node58&#45;&gt;Node65 -->
+<g id="edge194" class="edge">
+<title>Node58&#45;&gt;Node65</title>
+<path fill="none" stroke="#191970" d="M2054.6222,-951.4686C1968.1983,-940.6897 1828.926,-923.3194 1747.3687,-913.1475"/>
+<polygon fill="#191970" stroke="#191970" points="1747.5858,-909.6475 1737.2295,-911.8829 1746.7194,-916.5937 1747.5858,-909.6475"/>
 </g>
-<!-- Node68&#45;&gt;Node16 -->
-<g id="edge174" class="edge">
-<title>Node68&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M706.6583,-834.3274C659.0001,-829.4269 588.2288,-818.085 532,-792 482.6348,-769.099 481.1658,-746.3551 436,-716 366.9571,-669.5976 328.2973,-686.1474 273,-624 216.3831,-560.3695 202,-531.1723 202,-446 202,-446 202,-446 202,-133 202,-101.7875 202.1354,-87.1172 226,-67 263.782,-35.1509 411.5594,-21.8491 478.1493,-17.389"/>
-<polygon fill="#191970" stroke="#191970" points="478.7278,-20.8591 488.4826,-16.7246 478.2787,-13.8735 478.7278,-20.8591"/>
+<!-- Node74 -->
+<g id="node50" class="node">
+<title>Node74</title>
+<g id="a_node50"><a xlink:href="trace_8h.html" target="_top" xlink:title="tvm/tir/schedule/trace.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="2046.5432,-895.5 2046.5432,-914.5 2179.5432,-914.5 2179.5432,-895.5 2046.5432,-895.5"/>
+<text text-anchor="middle" x="2113.0432" y="-902.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/schedule/trace.h</text>
+</a>
 </g>
-<!-- Node68&#45;&gt;Node14 -->
-<g id="edge173" class="edge">
-<title>Node68&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M813.487,-837.0421C1119.8869,-831.3348 2645.6293,-799.8735 2844,-736 3067.6903,-663.9738 3239,-619.5003 3239,-384.5 3239,-384.5 3239,-384.5 3239,-133 3239,-98.6999 3226.6163,-87.3429 3199,-67 3171.3721,-46.6485 3075.2694,-28.7743 3024.3076,-20.4556"/>
-<polygon fill="#191970" stroke="#191970" points="3024.5871,-16.9558 3014.159,-18.828 3023.4786,-23.8674 3024.5871,-16.9558"/>
 </g>
-<!-- Node68&#45;&gt;Node31 -->
-<g id="edge171" class="edge">
-<title>Node68&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M765.1012,-828.371C782.4141,-795.9176 841.0264,-687.8825 898,-604 1012.3089,-435.7024 1062.6023,-397.9535 1242,-302 1267.1714,-288.5367 1296.6048,-277.2556 1322.2032,-268.7165"/>
-<polygon fill="#191970" stroke="#191970" points="1323.3901,-272.0108 1331.8094,-265.5792 1321.2169,-265.3567 1323.3901,-272.0108"/>
+<!-- Node58&#45;&gt;Node74 -->
+<g id="edge209" class="edge">
+<title>Node58&#45;&gt;Node74</title>
+<path fill="none" stroke="#191970" d="M2127.9079,-951.2455C2125.5333,-943.8579 2122.194,-933.4689 2119.2818,-924.4087"/>
+<polygon fill="#191970" stroke="#191970" points="2122.5349,-923.0919 2116.1427,-914.6427 2115.8707,-925.234 2122.5349,-923.0919"/>
 </g>
-<!-- Node68&#45;&gt;Node59 -->
-<g id="edge169" class="edge">
-<title>Node68&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M813.1168,-834.8084C975.6658,-825.0415 1465.5266,-795.6078 1634.1949,-785.4733"/>
-<polygon fill="#191970" stroke="#191970" points="1634.5888,-788.956 1644.3608,-784.8624 1634.1689,-781.9686 1634.5888,-788.956"/>
+<!-- Node59&#45;&gt;Node5 -->
+<g id="edge185" class="edge">
+<title>Node59&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M3309.9325,-559.9051C3107.5515,-548.9003 2498.4877,-515.7815 2304.8417,-505.2516"/>
+<polygon fill="#191970" stroke="#191970" points="2305.0262,-501.7566 2294.8509,-504.7084 2304.6461,-508.7463 2305.0262,-501.7566"/>
 </g>
-<!-- Node68&#45;&gt;Node66 -->
-<g id="edge172" class="edge">
-<title>Node68&#45;&gt;Node66</title>
-<path fill="none" stroke="#191970" d="M706.9543,-830.036C639.721,-819.942 523.8955,-802.5527 451.5868,-791.6967"/>
-<polygon fill="#191970" stroke="#191970" points="452.091,-788.2332 441.6822,-790.2097 451.0517,-795.1557 452.091,-788.2332"/>
+<!-- Node59&#45;&gt;Node6 -->
+<g id="edge186" class="edge">
+<title>Node59&#45;&gt;Node6</title>
+<path fill="none" stroke="#191970" d="M3349.9201,-548.3795C3310.8277,-525.7521 3237.5602,-483.3435 3198.7262,-460.8657"/>
+<polygon fill="#191970" stroke="#191970" points="3200.0449,-457.585 3189.6367,-455.6046 3196.5382,-463.6433 3200.0449,-457.585"/>
 </g>
-<!-- Node73&#45;&gt;Node16 -->
-<g id="edge177" class="edge">
-<title>Node73&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1019.8614,-302.4244C997.3344,-292.1696 966.8607,-278.2842 940,-266 825.2057,-213.5014 791.4145,-209.302 684,-143 653.3025,-124.0519 570.2697,-60.9798 531.4823,-31.2553"/>
-<polygon fill="#191970" stroke="#191970" points="533.547,-28.4279 523.4828,-25.1168 529.2855,-33.9813 533.547,-28.4279"/>
+<!-- Node59&#45;&gt;Node10 -->
+<g id="edge188" class="edge">
+<title>Node59&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M3396.3591,-548.4551C3429.3711,-522.5385 3493.2626,-465.8662 3516.0432,-400 3540.0134,-330.6948 3537.5815,-305.0991 3516.0432,-235 3491.3973,-154.7868 3443.7532,-149.2279 3364.0432,-123 3303.382,-103.0399 3231.5522,-91.048 3179.7834,-84.338"/>
+<polygon fill="#191970" stroke="#191970" points="3180.0661,-80.846 3169.7064,-83.0656 3179.1891,-87.7909 3180.0661,-80.846"/>
 </g>
-<!-- Node73&#45;&gt;Node18 -->
-<g id="edge178" class="edge">
-<title>Node73&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1116.082,-310.0359C1318.9983,-286.026 1950.6377,-211.2878 2104.8034,-193.0463"/>
-<polygon fill="#191970" stroke="#191970" points="2105.5463,-196.4829 2115.0657,-191.832 2104.7237,-189.5314 2105.5463,-196.4829"/>
+<!-- Node59&#45;&gt;Node28 -->
+<g id="edge187" class="edge">
+<title>Node59&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M3368.2779,-548.4034C3341.6129,-496.5641 3253.8244,-325.8943 3224.6011,-269.0814"/>
+<polygon fill="#191970" stroke="#191970" points="3227.6831,-267.4213 3219.9966,-260.1297 3221.4583,-270.6233 3227.6831,-267.4213"/>
+</g>
+<!-- Node65&#45;&gt;Node1 -->
+<g id="edge196" class="edge">
+<title>Node65&#45;&gt;Node1</title>
+<path fill="none" stroke="#191970" d="M1626.8864,-899.06C1527.8262,-888.392 1320.7271,-866.089 1219.806,-855.2206"/>
+<polygon fill="#191970" stroke="#191970" points="1220.1097,-851.7332 1209.7924,-854.1422 1219.3602,-858.6929 1220.1097,-851.7332"/>
+</g>
+<!-- Node65&#45;&gt;Node3 -->
+<g id="edge195" class="edge">
+<title>Node65&#45;&gt;Node3</title>
+<path fill="none" stroke="#191970" d="M1703.1839,-895.375C1781.0902,-859.9054 2052.3472,-736.4063 2143.727,-694.8025"/>
+<polygon fill="#191970" stroke="#191970" points="2145.3305,-697.9182 2152.9814,-690.5891 2142.43,-691.5474 2145.3305,-697.9182"/>
+</g>
+<!-- Node65&#45;&gt;Node5 -->
+<g id="edge197" class="edge">
+<title>Node65&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M1737.1292,-903.1199C1982.512,-894.3892 2964.0432,-855.5568 2964.0432,-793 2964.0432,-793 2964.0432,-793 2964.0432,-625 2964.0432,-558.3326 2477.5738,-518.1192 2304.9277,-505.9469"/>
+<polygon fill="#191970" stroke="#191970" points="2304.9662,-502.4411 2294.7467,-505.236 2304.4786,-509.4241 2304.9662,-502.4411"/>
+</g>
+<!-- Node65&#45;&gt;Node20 -->
+<g id="edge208" class="edge">
+<title>Node65&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1685.5613,-895.2612C1688.7595,-886.0395 1693.3744,-871.7461 1696.0432,-859 1749.1219,-605.5042 1769.4468,-290.0468 1773.9995,-208.8015"/>
+<polygon fill="#191970" stroke="#191970" points="1777.5088,-208.725 1774.5588,-198.5492 1770.5192,-208.3437 1777.5088,-208.725"/>
+</g>
+<!-- Node65&#45;&gt;Node16 -->
+<g id="edge206" class="edge">
+<title>Node65&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1626.716,-902.1812C1415.8361,-891.1286 669.5067,-849.0835 574.0432,-803 367.5377,-703.3127 245.7019,-592.9365 295.0432,-369 315.0679,-278.1179 310.9473,-237.8449 383.0432,-179 466.8929,-110.5617 514.5562,-147.2291 620.0432,-123 731.4408,-97.4133 757.7761,-82.3183 871.0432,-67 1073.4694,-39.6238 1727.6702,-20.5395 1881.4593,-16.361"/>
+<polygon fill="#191970" stroke="#191970" points="1881.7523,-19.8545 1891.6542,-16.0861 1881.5635,-12.8571 1881.7523,-19.8545"/>
+</g>
+<!-- Node65&#45;&gt;Node57 -->
+<g id="edge207" class="edge">
+<title>Node65&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M1675.9898,-895.4509C1664.1479,-876.7707 1637.7467,-835.1238 1622.5022,-811.0761"/>
+<polygon fill="#191970" stroke="#191970" points="1625.3916,-809.0969 1617.0813,-802.5249 1619.4794,-812.8448 1625.3916,-809.0969"/>
 </g>
-<!-- Node73&#45;&gt;Node31 -->
-<g id="edge176" class="edge">
-<title>Node73&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1116.0276,-304.7891C1120.7553,-303.8429 1125.4529,-302.9049 1130,-302 1208.1222,-286.453 1230.1169,-282.9852 1311.5624,-266.2775"/>
-<polygon fill="#191970" stroke="#191970" points="1312.7074,-269.6154 1321.7973,-264.1727 1311.2972,-262.7589 1312.7074,-269.6154"/>
+<!-- Node66 -->
+<g id="node48" class="node">
+<title>Node66</title>
+<g id="a_node48"><a xlink:href="with_8h.html" target="_top" xlink:title="RAII wrapper function to enter and exit a context object similar to python&#39;s with syntax...">
+<polygon fill="#ffffff" stroke="#ff0000" points="48.5432,-671.5 48.5432,-690.5 157.5432,-690.5 157.5432,-671.5 48.5432,-671.5"/>
+<text text-anchor="middle" x="103.0432" y="-678.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/support/with.h</text>
+</a>
 </g>
-<!-- Node75 -->
-<g id="node50" class="node">
-<title>Node75</title>
-<g id="a_node50"><a xlink:href="instruction_8h.html" target="_top" xlink:title="tvm/tir/schedule/instruction.h">
-<polygon fill="#ffffff" stroke="#000000" points="306,-492.5 306,-511.5 466,-511.5 466,-492.5 306,-492.5"/>
-<text text-anchor="middle" x="386" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/schedule/instruction.h</text>
+</g>
+<!-- Node65&#45;&gt;Node66 -->
+<g id="edge198" class="edge">
+<title>Node65&#45;&gt;Node66</title>
+<path fill="none" stroke="#191970" d="M1626.6963,-903.7027C1417.429,-898.6592 681.1514,-879.6132 578.0432,-859 507.4457,-844.8863 493.4968,-828.1641 426.0432,-803 320.9233,-763.7842 196.8534,-716.6958 137.7372,-694.2102"/>
+<polygon fill="#191970" stroke="#191970" points="138.7368,-690.8458 128.1458,-690.5611 136.2476,-697.3883 138.7368,-690.8458"/>
+</g>
+<!-- Node67 -->
+<g id="node49" class="node">
+<title>Node67</title>
+<g id="a_node49"><a xlink:href="target__kind_8h.html" target="_top" xlink:title="Target kind registry. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1342.0432,-554 1342.0432,-573 1478.0432,-573 1478.0432,-554 1342.0432,-554"/>
+<text text-anchor="middle" x="1410.0432" y="-561" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target_kind.h</text>
 </a>
 </g>
 </g>
-<!-- Node74&#45;&gt;Node75 -->
-<g id="edge188" class="edge">
-<title>Node74&#45;&gt;Node75</title>
-<path fill="none" stroke="#191970" d="M147.3273,-660.4862C189.2372,-632.6567 311.8618,-551.2301 363.0933,-517.2108"/>
-<polygon fill="#191970" stroke="#191970" points="365.1218,-520.0652 371.5163,-511.6177 361.2495,-514.2338 365.1218,-520.0652"/>
+<!-- Node65&#45;&gt;Node67 -->
+<g id="edge200" class="edge">
+<title>Node65&#45;&gt;Node67</title>
+<path fill="none" stroke="#191970" d="M1682.6961,-895.4238C1683.7298,-873.8695 1683.82,-820.5008 1663.0432,-783 1614.3324,-695.0797 1567.9221,-700.9174 1492.0432,-635 1470.4298,-616.224 1445.6632,-594.6158 1429.0547,-580.1119"/>
+<polygon fill="#191970" stroke="#191970" points="1431.007,-577.1701 1421.173,-573.2275 1426.402,-582.4421 1431.007,-577.1701"/>
 </g>
-<!-- Node75&#45;&gt;Node4 -->
-<g id="edge189" class="edge">
-<title>Node75&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M466.0257,-499.3084C750.5788,-489.7379 1710.7615,-457.4435 1980.1795,-448.382"/>
-<polygon fill="#191970" stroke="#191970" points="1980.3502,-451.8783 1990.2269,-448.044 1980.1149,-444.8822 1980.3502,-451.8783"/>
+<!-- Node66&#45;&gt;Node18 -->
+<g id="edge199" class="edge">
+<title>Node66&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M85.102,-671.3281C55.5918,-653.962 .0432,-614.4995 .0432,-563.5 .0432,-563.5 .0432,-563.5 .0432,-133 .0432,-101.7875 -.6487,-86.0927 24.0432,-67 64.5343,-35.6909 423.3684,-20.57 535.0291,-16.5942"/>
+<polygon fill="#191970" stroke="#191970" points="535.4692,-20.0811 545.3408,-16.2339 535.2247,-13.0853 535.4692,-20.0811"/>
 </g>
-<!-- Node75&#45;&gt;Node16 -->
-<g id="edge190" class="edge">
-<title>Node75&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M375.3685,-492.4806C355.7862,-473.8398 316,-430.3277 316,-384.5 316,-384.5 316,-384.5 316,-133 316,-101.7875 317.3176,-88.4413 340,-67 360.0247,-48.071 434.6812,-30.5999 478.4499,-21.6879"/>
-<polygon fill="#191970" stroke="#191970" points="479.2161,-25.104 488.3369,-19.7131 477.845,-18.2396 479.2161,-25.104"/>
+<!-- Node67&#45;&gt;Node5 -->
+<g id="edge201" class="edge">
+<title>Node67&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M1478.3748,-558.4672C1638.2892,-546.6891 2034.7228,-517.4907 2185.5232,-506.3838"/>
+<polygon fill="#191970" stroke="#191970" points="2185.8073,-509.8725 2195.5232,-505.6473 2185.2931,-502.8914 2185.8073,-509.8725"/>
+</g>
+<!-- Node67&#45;&gt;Node23 -->
+<g id="edge202" class="edge">
+<title>Node67&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M1421.259,-553.8663C1480.04,-503.3766 1752.7078,-269.17 1827.1961,-205.1887"/>
+<polygon fill="#191970" stroke="#191970" points="1829.5964,-207.7409 1834.9017,-198.57 1825.0353,-202.4308 1829.5964,-207.7409"/>
+</g>
+<!-- Node67&#45;&gt;Node18 -->
+<g id="edge204" class="edge">
+<title>Node67&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1349.2304,-553.9519C1176.617,-526.5731 686.0546,-446.7686 530.0432,-400 456.5884,-377.9799 436.9959,-372.1247 371.0432,-333 282.632,-280.5524 190.0432,-291.7974 190.0432,-189 190.0432,-189 190.0432,-189 190.0432,-133 190.0432,-60.9455 443.6714,-28.0956 535.4188,-18.5696"/>
+<polygon fill="#191970" stroke="#191970" points="535.9327,-22.0356 545.5296,-17.5468 535.2281,-15.0711 535.9327,-22.0356"/>
+</g>
+<!-- Node67&#45;&gt;Node20 -->
+<g id="edge205" class="edge">
+<title>Node67&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1411.6464,-553.91C1415.6642,-531.7803 1427.6365,-475.9834 1452.0432,-436 1517.378,-328.9681 1544.659,-302.9603 1650.0432,-235 1678.5227,-216.6342 1715.3583,-204.2212 1741.6504,-196.9642"/>
+<polygon fill="#191970" stroke="#191970" points="1742.7517,-200.2932 1751.5192,-194.3449 1740.956,-193.5274 1742.7517,-200.2932"/>
+</g>
+<!-- Node67&#45;&gt;Node36 -->
+<g id="edge203" class="edge">
+<title>Node67&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M1373.9828,-553.9472C1321.9146,-540.1704 1222.6374,-513.9655 1138.0432,-492 862.7013,-420.5055 769.4708,-466.0744 518.0432,-333 478.2445,-311.9355 460.574,-307.0398 442.0432,-266 433.668,-247.4514 439.5853,-224.1793 445.913,-208.1014"/>
+<polygon fill="#191970" stroke="#191970" points="449.2234,-209.2643 449.9843,-198.6968 442.7995,-206.4833 449.2234,-209.2643"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/apply__history__best_8h_source.html b/docs/reference/api/doxygen/apply__history__best_8h_source.html
index a5cf5f3ef..067b33c0e 100644
--- a/docs/reference/api/doxygen/apply__history__best_8h_source.html
+++ b/docs/reference/api/doxygen/apply__history__best_8h_source.html
@@ -66,29 +66,36 @@ $(function() {
 <div class="title">apply_history_best.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="apply__history__best_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment">  [...]
-<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a928dee9281dff37dffb2a06bb3343ceb"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a928dee9281dff37dffb2a06bb3343ceb">tvm::meta_schedule::ApplyHistoryBestNode::logging_func</a></div><div class="ttdeci">PackedFunc logging_func</div><div class="ttdoc">The logging function to be used. </div><div class="ttdef"><b>Definition:</b> apply_history_best.h:37</div></div>
-<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_ae4c80b6dfe62636442a96bafb6887aa4"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#ae4c80b6dfe62636442a96bafb6887aa4">tvm::meta_schedule::ApplyHistoryBestNode::database</a></div><div class="ttdeci">Database database</div><div class="ttdoc">The database to be queried from. </div><div class="ttdef"><b>Definition:</b> apply_history_best.h:35</div></div>
+<a href="apply__history__best_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment">  [...]
+<div class="ttc" id="object_8h_html_a98fa3013ab23958a9f05200330e35805"><div class="ttname"><a href="object_8h.html#a98fa3013ab23958a9f05200330e35805">TVM_DEFINE_MUTABLE_NOTNULLABLE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_MUTABLE_NOTNULLABLE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:758</div></div>
+<div class="ttc" id="string_8h_html"><div class="ttname"><a href="string_8h.html">string.h</a></div><div class="ttdoc">Runtime String container types. </div></div>
+<div class="ttc" id="ir_2module_8h_html"><div class="ttname"><a href="ir_2module_8h.html">module.h</a></div><div class="ttdoc">IRModule that holds the functions and type definitions. </div></div>
+<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a928dee9281dff37dffb2a06bb3343ceb"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a928dee9281dff37dffb2a06bb3343ceb">tvm::meta_schedule::ApplyHistoryBestNode::logging_func</a></div><div class="ttdeci">PackedFunc logging_func</div><div class="ttdoc">The logging function to be used. </div><div class="ttdef"><b>Definition:</b> apply_history_best.h:44</div></div>
+<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_ae4c80b6dfe62636442a96bafb6887aa4"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#ae4c80b6dfe62636442a96bafb6887aa4">tvm::meta_schedule::ApplyHistoryBestNode::database</a></div><div class="ttdeci">Database database</div><div class="ttdoc">The database to be queried from. </div><div class="ttdef"><b>Definition:</b> apply_history_best.h:42</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
-<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBest_html"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBest.html">tvm::meta_schedule::ApplyHistoryBest</a></div><div class="ttdoc">Managed reference to ApplyHistoryBestNode. </div><div class="ttdef"><b>Definition:</b> apply_history_best.h:58</div></div>
+<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBest_html"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBest.html">tvm::meta_schedule::ApplyHistoryBest</a></div><div class="ttdoc">Managed reference to ApplyHistoryBestNode. </div><div class="ttdef"><b>Definition:</b> apply_history_best.h:65</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:167</div></div>
+<div class="ttc" id="array_8h_html"><div class="ttname"><a href="array_8h.html">array.h</a></div><div class="ttdoc">Runtime Array container types. </div></div>
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
 <div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a86fc3705d9a37c98f75a9a843878178a"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a86fc3705d9a37c98f75a9a843878178a">tvm::meta_schedule::ApplyHistoryBestNode::Query</a></div><div class="ttdeci">Optional&lt; IRModule &gt; Query(runtime::String task_name, IRModule mod, Target target, Optional&lt; Array&lt; IRModule &gt;&gt; dispatched)</div><div class="ttdoc">Query the best en [...]
-<div class="ttc" id="classtvm_1_1meta__schedule_1_1Database_html"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1Database.html">tvm::meta_schedule::Database</a></div><div class="ttdoc">Managed reference to DatabaseNode. </div><div class="ttdef"><b>Definition:</b> database.h:281</div></div>
+<div class="ttc" id="classtvm_1_1meta__schedule_1_1Database_html"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1Database.html">tvm::meta_schedule::Database</a></div><div class="ttdoc">Managed reference to DatabaseNode. </div><div class="ttdef"><b>Definition:</b> database.h:288</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
+<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a6be4c52d4ff271c11d2f2daf53861778"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a6be4c52d4ff271c11d2f2daf53861778">tvm::meta_schedule::ApplyHistoryBestNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> apply_history_best.h:46</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:129</div></div>
 <div class="ttc" id="classtvm_1_1Target_html"><div class="ttname"><a href="classtvm_1_1Target.html">tvm::Target</a></div><div class="ttdoc">Managed reference class to TargetNode. </div><div class="ttdef"><b>Definition:</b> target.h:141</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:511</div></div>
-<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a755012568d85aa7cba250c5f8be766cc"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a755012568d85aa7cba250c5f8be766cc">tvm::meta_schedule::ApplyHistoryBestNode::_type_key</a></div><div class="ttdeci">static constexpr const char * _type_key</div><div class="ttdef"><b>Definition:</b> apply_history_best.h:50</div></div>
+<div class="ttc" id="object_8h_html"><div class="ttname"><a href="object_8h.html">object.h</a></div><div class="ttdoc">A managed object in the TVM runtime. </div></div>
+<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a755012568d85aa7cba250c5f8be766cc"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a755012568d85aa7cba250c5f8be766cc">tvm::meta_schedule::ApplyHistoryBestNode::_type_key</a></div><div class="ttdeci">static constexpr const char * _type_key</div><div class="ttdef"><b>Definition:</b> apply_history_best.h:57</div></div>
 <div class="ttc" id="classtvm_1_1IRModule_html"><div class="ttname"><a href="classtvm_1_1IRModule.html">tvm::IRModule</a></div><div class="ttdoc">Managed reference class to IRModuleNode. </div><div class="ttdef"><b>Definition:</b> module.h:360</div></div>
 <div class="ttc" id="target_8h_html"><div class="ttname"><a href="target_8h.html">target.h</a></div><div class="ttdoc">Compilation target object. </div></div>
-<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html">tvm::meta_schedule::ApplyHistoryBestNode</a></div><div class="ttdoc">An integration context that allows application of historically best records from a database...</div><div class="ttdef"><b>Definition:</b> apply_history_best.h:32</div></div>
+<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html">tvm::meta_schedule::ApplyHistoryBestNode</a></div><div class="ttdoc">An integration context that allows application of historically best records from a database...</div><div class="ttdef"><b>Definition:</b> apply_history_best.h:39</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html">tvm::runtime::PackedFunc</a></div><div class="ttdoc">Packed function is a type-erased function. The arguments are passed by packed format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:138</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Optional_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Optional.html">tvm::runtime::Optional</a></div><div class="ttdoc">Optional container that to represent to a Nullable variant of T. </div><div class="ttdef"><b>Definition:</b> optional.h:51</div></div>
 <div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a124bdf490b05d2534053b09299db18dd"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a124bdf490b05d2534053b09299db18dd">tvm::meta_schedule::ApplyHistoryBestNode::TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_FINAL_OBJECT_INFO(ApplyHistoryBestNode, runtime::Object)</div></div>
 <div class="ttc" id="database_8h_html"><div class="ttname"><a href="database_8h.html">database.h</a></div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_aaa95d3ad68932ab206efbe0a326db6a2"><div class="ttname"><a href="namespacetvm_1_1topi.html#aaa95d3ad68932ab206efbe0a326db6a2">tvm::topi::mod</a></div><div class="ttdeci">tvm::PrimExpr mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:290</div></div>
-<div class="ttc" id="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode_html_a1ef1c1f1d65ff784abf4c7e064d54637"><div class="ttname"><a href="classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#a1ef1c1f1d65ff784abf4c7e064d54637">tvm::meta_schedule::ApplyHistoryBestNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> apply_history_best.h:39</div></div>
+<div class="ttc" id="reflection_8h_html"><div class="ttname"><a href="reflection_8h.html">reflection.h</a></div><div class="ttdoc">Reflection and serialization of compiler IR/AST nodes. </div></div>
+<div class="ttc" id="packed__func_8h_html"><div class="ttname"><a href="packed__func_8h.html">packed_func.h</a></div><div class="ttdoc">Type-erased function used across TVM API. </div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/arg__info_8h.html b/docs/reference/api/doxygen/arg__info_8h.html
index fb6635fad..e60a294e5 100644
--- a/docs/reference/api/doxygen/arg__info_8h.html
+++ b/docs/reference/api/doxygen/arg__info_8h.html
@@ -70,18 +70,21 @@ $(function() {
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><code>#include &lt;<a class="el" href="node_8h_source.html">tvm/node/node.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="reflection_8h_source.html">tvm/node/reflection.h</a>&gt;</code><br />
 <code>#include &lt;<a class="el" href="shape__tuple_8h_source.html">tvm/runtime/container/shape_tuple.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="data__type_8h_source.html">tvm/runtime/data_type.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="object_8h_source.html">tvm/runtime/object.h</a>&gt;</code><br />
 <code>#include &lt;<a class="el" href="tir_2function_8h_source.html">tvm/tir/function.h</a>&gt;</code><br />
 </div><div class="textblock"><div class="dynheader">
 Include dependency graph for arg_info.h:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="arg__info_8h__incl.svg" width="4715" height="1380"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="arg__info_8h__incl.svg" width="4715" height="1306"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div><div class="textblock"><div class="dynheader">
 This graph shows which files directly or indirectly include this file:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="arg__info_8h__dep__incl.svg" width="914" height="499"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="arg__info_8h__dep__incl.svg" width="1028" height="588"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg b/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg
index 7c4174383..467e5f121 100644
--- a/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg
@@ -4,209 +4,261 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/meta_schedule/arg_info.h Pages: 1 -->
-<svg width="685pt" height="374pt"
- viewBox="0.00 0.00 685.00 374.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 370)">
+<svg width="771pt" height="441pt"
+ viewBox="0.00 0.00 770.69 441.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 437)">
 <title>include/tvm/meta_schedule/arg_info.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-370 681,-370 681,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-437 766.6884,-437 766.6884,4 -4,4"/>
 <!-- Node56 -->
 <g id="node1" class="node">
 <title>Node56</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="276,-335.5 276,-365.5 428,-365.5 428,-335.5 276,-335.5"/>
-<text text-anchor="start" x="284" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="352" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="312.6884,-402.5 312.6884,-432.5 464.6884,-432.5 464.6884,-402.5 312.6884,-402.5"/>
+<text text-anchor="start" x="320.6884" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="388.6884" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
 </g>
 <!-- Node57 -->
 <g id="node2" class="node">
 <title>Node57</title>
-<g id="a_node2"><a xlink:href="database_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/database.h">
-<polygon fill="#ffffff" stroke="#000000" points="104,-268.5 104,-298.5 256,-298.5 256,-268.5 104,-268.5"/>
-<text text-anchor="start" x="112" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="180" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
+<g id="a_node2"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
+<polygon fill="#ffffff" stroke="#000000" points="468.6884,-268.5 468.6884,-298.5 620.6884,-298.5 620.6884,-268.5 468.6884,-268.5"/>
+<text text-anchor="start" x="476.6884" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="544.6884" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
 </a>
 </g>
 </g>
 <!-- Node56&#45;&gt;Node57 -->
 <g id="edge1" class="edge">
 <title>Node56&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M303.8078,-331.7275C276.9477,-321.2645 243.9441,-308.4084 218.7685,-298.6017"/>
-<polygon fill="#191970" stroke="#191970" points="302.8114,-335.0954 313.3998,-335.4639 305.3522,-328.5728 302.8114,-335.0954"/>
+<path fill="none" stroke="#191970" d="M427.6596,-397.3475C442.7565,-388.6773 459.7288,-377.8167 473.6884,-366 497.9192,-345.4888 520.9242,-316.3688 533.877,-298.7724"/>
+<polygon fill="#191970" stroke="#191970" points="425.5603,-394.5117 418.5458,-402.4519 428.9809,-400.6191 425.5603,-394.5117"/>
 </g>
-<!-- Node60 -->
-<g id="node5" class="node">
-<title>Node60</title>
-<g id="a_node5"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
-<polygon fill="#ffffff" stroke="#000000" points="276,-268.5 276,-298.5 428,-298.5 428,-268.5 276,-268.5"/>
-<text text-anchor="start" x="284" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="352" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/runner.h</text>
+<!-- Node58 -->
+<g id="node3" class="node">
+<title>Node58</title>
+<g id="a_node3"><a xlink:href="search__strategy_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/search_strategy.h">
+<polygon fill="#ffffff" stroke="#000000" points="364.6884,-201.5 364.6884,-231.5 516.6884,-231.5 516.6884,-201.5 364.6884,-201.5"/>
+<text text-anchor="start" x="372.6884" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="440.6884" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_strategy.h</text>
 </a>
 </g>
 </g>
-<!-- Node56&#45;&gt;Node60 -->
-<g id="edge4" class="edge">
-<title>Node56&#45;&gt;Node60</title>
-<path fill="none" stroke="#191970" d="M352,-325.0249C352,-316.128 352,-306.4287 352,-298.6432"/>
-<polygon fill="#191970" stroke="#191970" points="348.5001,-325.2966 352,-335.2967 355.5001,-325.2967 348.5001,-325.2966"/>
+<!-- Node56&#45;&gt;Node58 -->
+<g id="edge24" class="edge">
+<title>Node56&#45;&gt;Node58</title>
+<path fill="none" stroke="#191970" d="M346.6858,-397.6659C293.7244,-369.9566 213.8854,-317.4821 251.6884,-268 265.7536,-249.5894 318.3842,-236.1879 364.5449,-227.6786"/>
+<polygon fill="#191970" stroke="#191970" points="345.407,-400.9431 355.9035,-402.3832 348.596,-394.7117 345.407,-400.9431"/>
 </g>
 <!-- Node62 -->
 <g id="node7" class="node">
 <title>Node62</title>
-<g id="a_node7"><a xlink:href="search__strategy_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/search_strategy.h">
-<polygon fill="#ffffff" stroke="#000000" points="387,-201.5 387,-231.5 539,-231.5 539,-201.5 387,-201.5"/>
-<text text-anchor="start" x="395" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="463" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_strategy.h</text>
+<g id="a_node7"><a xlink:href="database_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/database.h">
+<polygon fill="#ffffff" stroke="#000000" points="14.6884,-268.5 14.6884,-298.5 166.6884,-298.5 166.6884,-268.5 14.6884,-268.5"/>
+<text text-anchor="start" x="22.6884" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="90.6884" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
 </a>
 </g>
 </g>
 <!-- Node56&#45;&gt;Node62 -->
-<g id="edge17" class="edge">
+<g id="edge9" class="edge">
 <title>Node56&#45;&gt;Node62</title>
-<path fill="none" stroke="#191970" d="M398.1277,-330.5557C412.2666,-322.5215 426.7726,-312.0244 437,-299 452.6741,-279.0392 458.9845,-249.4789 461.4633,-231.6925"/>
-<polygon fill="#191970" stroke="#191970" points="396.0983,-327.6724 388.9381,-335.4815 399.4053,-333.842 396.0983,-327.6724"/>
+<path fill="none" stroke="#191970" d="M314.4739,-399.8525C284.0771,-391.3952 248.9942,-380.0215 218.6884,-366 177.8979,-347.1276 134.837,-316.9393 110.5088,-298.7864"/>
+<polygon fill="#191970" stroke="#191970" points="313.5978,-403.2414 324.1663,-402.4956 315.4394,-396.4879 313.5978,-403.2414"/>
 </g>
-<!-- Node58 -->
-<g id="node3" class="node">
-<title>Node58</title>
-<g id="a_node3"><a xlink:href="apply__history__best_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/apply_history_best.h">
-<polygon fill="#ffffff" stroke="#000000" points="0,-201.5 0,-231.5 152,-231.5 152,-201.5 0,-201.5"/>
-<text text-anchor="start" x="8" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="76" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/apply_history_best.h</text>
+<!-- Node64 -->
+<g id="node9" class="node">
+<title>Node64</title>
+<g id="a_node9"><a xlink:href="measure__candidate_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_candidate.h">
+<polygon fill="#ffffff" stroke="#000000" points="312.6884,-335.5 312.6884,-365.5 464.6884,-365.5 464.6884,-335.5 312.6884,-335.5"/>
+<text text-anchor="start" x="320.6884" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="388.6884" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_candidate.h</text>
+</a>
+</g>
+</g>
+<!-- Node56&#45;&gt;Node64 -->
+<g id="edge13" class="edge">
+<title>Node56&#45;&gt;Node64</title>
+<path fill="none" stroke="#191970" d="M388.6884,-392.0249C388.6884,-383.128 388.6884,-373.4287 388.6884,-365.6432"/>
+<polygon fill="#191970" stroke="#191970" points="385.1885,-392.2966 388.6884,-402.2967 392.1885,-392.2967 385.1885,-392.2966"/>
+</g>
+<!-- Node66 -->
+<g id="node11" class="node">
+<title>Node66</title>
+<g id="a_node11"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
+<polygon fill="#ffffff" stroke="#000000" points="585.6884,-335.5 585.6884,-365.5 737.6884,-365.5 737.6884,-335.5 585.6884,-335.5"/>
+<text text-anchor="start" x="593.6884" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="661.6884" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/runner.h</text>
 </a>
 </g>
 </g>
+<!-- Node56&#45;&gt;Node66 -->
+<g id="edge18" class="edge">
+<title>Node56&#45;&gt;Node66</title>
+<path fill="none" stroke="#191970" d="M459.9028,-400.0225C503.4253,-389.3411 558.5309,-375.8171 600.1547,-365.6017"/>
+<polygon fill="#191970" stroke="#191970" points="458.8325,-396.6812 449.9549,-402.4639 460.501,-403.4795 458.8325,-396.6812"/>
+</g>
 <!-- Node57&#45;&gt;Node58 -->
 <g id="edge2" class="edge">
 <title>Node57&#45;&gt;Node58</title>
-<path fill="none" stroke="#191970" d="M147.861,-262.7951C132.2777,-252.7558 113.8939,-240.9124 99.6216,-231.7177"/>
-<polygon fill="#191970" stroke="#191970" points="146.3583,-265.9904 156.6604,-268.4639 150.1494,-260.1058 146.3583,-265.9904"/>
+<path fill="none" stroke="#191970" d="M512.5494,-262.7951C496.9661,-252.7558 478.5823,-240.9124 464.31,-231.7177"/>
+<polygon fill="#191970" stroke="#191970" points="511.0467,-265.9904 521.3488,-268.4639 514.8378,-260.1058 511.0467,-265.9904"/>
+</g>
+<!-- Node60 -->
+<g id="node5" class="node">
+<title>Node60</title>
+<g id="a_node5"><a xlink:href="task__scheduler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/task_scheduler.h">
+<polygon fill="#ffffff" stroke="#000000" points="506.6884,-.5 506.6884,-30.5 658.6884,-30.5 658.6884,-.5 506.6884,-.5"/>
+<text text-anchor="start" x="514.6884" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="582.6884" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/task_scheduler.h</text>
+</a>
+</g>
+</g>
+<!-- Node57&#45;&gt;Node60 -->
+<g id="edge8" class="edge">
+<title>Node57&#45;&gt;Node60</title>
+<path fill="none" stroke="#191970" d="M578.254,-262.7238C608.068,-242.0992 649.6313,-207.4702 667.6884,-165 689.6508,-113.3445 633.5149,-56.9316 602.1431,-30.607"/>
+<polygon fill="#191970" stroke="#191970" points="576.1413,-259.9269 569.8024,-268.4162 580.0518,-265.7328 576.1413,-259.9269"/>
 </g>
 <!-- Node59 -->
 <g id="node4" class="node">
 <title>Node59</title>
-<g id="a_node4"><a xlink:href="task__scheduler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/task_scheduler.h">
-<polygon fill="#ffffff" stroke="#000000" points="180,-.5 180,-30.5 332,-30.5 332,-.5 180,-.5"/>
-<text text-anchor="start" x="188" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="256" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/task_scheduler.h</text>
+<g id="a_node4"><a xlink:href="measure__callback_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_callback.h">
+<polygon fill="#ffffff" stroke="#000000" points="402.6884,-67.5 402.6884,-97.5 554.6884,-97.5 554.6884,-67.5 402.6884,-67.5"/>
+<text text-anchor="start" x="410.6884" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="478.6884" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_callback.h</text>
 </a>
 </g>
 </g>
-<!-- Node57&#45;&gt;Node59 -->
+<!-- Node58&#45;&gt;Node59 -->
 <g id="edge3" class="edge">
-<title>Node57&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M177.5514,-258.2307C174.5924,-216.2411 173.3274,-130.4459 204,-67 210.9682,-52.5865 223.674,-39.8855 234.7814,-30.7029"/>
-<polygon fill="#191970" stroke="#191970" points="174.0777,-258.7015 178.3636,-268.3908 181.0555,-258.1436 174.0777,-258.7015"/>
-</g>
-<!-- Node60&#45;&gt;Node59 -->
-<g id="edge15" class="edge">
-<title>Node60&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M308.8902,-263.1998C296.1402,-255.1881 283.369,-244.7944 275,-232 232.8652,-167.5848 245.8176,-68.3517 252.8179,-30.696"/>
-<polygon fill="#191970" stroke="#191970" points="307.2668,-266.3057 317.6593,-268.3661 310.82,-260.2745 307.2668,-266.3057"/>
+<title>Node58&#45;&gt;Node59</title>
+<path fill="none" stroke="#191970" d="M447.7195,-191.706C455.5208,-164.1962 467.8634,-120.6723 474.3606,-97.7614"/>
+<polygon fill="#191970" stroke="#191970" points="444.3347,-190.8134 444.9736,-201.389 451.0692,-192.7232 444.3347,-190.8134"/>
 </g>
 <!-- Node61 -->
 <g id="node6" class="node">
 <title>Node61</title>
-<g id="a_node6"><a xlink:href="measure__callback_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_callback.h">
-<polygon fill="#ffffff" stroke="#000000" points="284,-67.5 284,-97.5 436,-97.5 436,-67.5 284,-67.5"/>
-<text text-anchor="start" x="292" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="360" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_callback.h</text>
+<g id="a_node6"><a xlink:href="tune__context_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/tune_context.h">
+<polygon fill="#ffffff" stroke="#000000" points="506.6884,-134.5 506.6884,-164.5 658.6884,-164.5 658.6884,-134.5 506.6884,-134.5"/>
+<text text-anchor="start" x="514.6884" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="582.6884" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
 </a>
 </g>
 </g>
-<!-- Node60&#45;&gt;Node61 -->
+<!-- Node58&#45;&gt;Node61 -->
 <g id="edge5" class="edge">
-<title>Node60&#45;&gt;Node61</title>
-<path fill="none" stroke="#191970" d="M326.3038,-261.5186C295.194,-232.1261 249.9008,-178.5538 275,-134 284.3156,-117.4638 301.3193,-105.6299 317.6841,-97.5103"/>
-<polygon fill="#191970" stroke="#191970" points="324.122,-264.2668 333.8561,-268.4501 328.8553,-259.1097 324.122,-264.2668"/>
-</g>
-<!-- Node60&#45;&gt;Node62 -->
-<g id="edge7" class="edge">
-<title>Node60&#45;&gt;Node62</title>
-<path fill="none" stroke="#191970" d="M385.8366,-263.0761C402.5672,-252.9774 422.4132,-240.9983 437.7885,-231.7177"/>
-<polygon fill="#191970" stroke="#191970" points="383.6632,-260.2998 376.9106,-268.4639 387.2806,-266.2927 383.6632,-260.2998"/>
+<title>Node58&#45;&gt;Node61</title>
+<path fill="none" stroke="#191970" d="M481.6137,-197.1902C503.4895,-186.8685 530.0381,-174.3421 550.4359,-164.7177"/>
+<polygon fill="#191970" stroke="#191970" points="480.1063,-194.0313 472.556,-201.4639 483.0934,-200.362 480.1063,-194.0313"/>
 </g>
-<!-- Node65 -->
-<g id="node10" class="node">
-<title>Node65</title>
-<g id="a_node10"><a xlink:href="tune__context_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/tune_context.h">
-<polygon fill="#ffffff" stroke="#000000" points="284,-134.5 284,-164.5 436,-164.5 436,-134.5 284,-134.5"/>
-<text text-anchor="start" x="292" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="360" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
-</a>
-</g>
-</g>
-<!-- Node60&#45;&gt;Node65 -->
-<g id="edge16" class="edge">
-<title>Node60&#45;&gt;Node65</title>
-<path fill="none" stroke="#191970" d="M353.502,-258.3415C355.1455,-230.8131 357.7271,-187.5714 359.0889,-164.7614"/>
-<polygon fill="#191970" stroke="#191970" points="350.0044,-258.1981 352.9022,-268.389 356.992,-258.6154 350.0044,-258.1981"/>
+<!-- Node59&#45;&gt;Node60 -->
+<g id="edge4" class="edge">
+<title>Node59&#45;&gt;Node60</title>
+<path fill="none" stroke="#191970" d="M510.8274,-61.7951C526.4107,-51.7558 544.7945,-39.9124 559.0669,-30.7177"/>
+<polygon fill="#191970" stroke="#191970" points="508.5391,-59.1058 502.028,-67.4639 512.3301,-64.9904 508.5391,-59.1058"/>
 </g>
 <!-- Node61&#45;&gt;Node59 -->
 <g id="edge6" class="edge">
 <title>Node61&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M327.861,-61.7951C312.2777,-51.7558 293.8939,-39.9124 279.6216,-30.7177"/>
-<polygon fill="#191970" stroke="#191970" points="326.3583,-64.9904 336.6604,-67.4639 330.1494,-59.1058 326.3583,-64.9904"/>
+<path fill="none" stroke="#191970" d="M550.5494,-128.7951C534.9661,-118.7558 516.5823,-106.9124 502.31,-97.7177"/>
+<polygon fill="#191970" stroke="#191970" points="549.0467,-131.9904 559.3488,-134.4639 552.8378,-126.1058 549.0467,-131.9904"/>
 </g>
-<!-- Node62&#45;&gt;Node61 -->
+<!-- Node61&#45;&gt;Node60 -->
+<g id="edge7" class="edge">
+<title>Node61&#45;&gt;Node60</title>
+<path fill="none" stroke="#191970" d="M582.6884,-124.3415C582.6884,-96.8131 582.6884,-53.5714 582.6884,-30.7614"/>
+<polygon fill="#191970" stroke="#191970" points="579.1885,-124.3889 582.6884,-134.389 586.1885,-124.389 579.1885,-124.3889"/>
+</g>
+<!-- Node62&#45;&gt;Node58 -->
 <g id="edge11" class="edge">
-<title>Node62&#45;&gt;Node61</title>
-<path fill="none" stroke="#191970" d="M462.6104,-191.302C461.1942,-173.7911 456.9284,-150.6931 445,-134 433.5641,-117.9962 415.4575,-106.0331 398.8891,-97.6857"/>
-<polygon fill="#191970" stroke="#191970" points="459.1206,-191.5841 463.1671,-201.3758 466.1099,-191.1979 459.1206,-191.5841"/>
+<title>Node62&#45;&gt;Node58</title>
+<path fill="none" stroke="#191970" d="M177.2131,-266.9367C234.6328,-255.9449 309.2965,-241.6522 364.4747,-231.0895"/>
+<polygon fill="#191970" stroke="#191970" points="176.2285,-263.5616 167.0649,-268.8794 177.5446,-270.4367 176.2285,-263.5616"/>
+</g>
+<!-- Node62&#45;&gt;Node60 -->
+<g id="edge12" class="edge">
+<title>Node62&#45;&gt;Node60</title>
+<path fill="none" stroke="#191970" d="M39.5373,-263.7472C26.1452,-255.9655 13.3301,-245.5649 5.6884,-232 -1.074,-219.996 -2.3072,-212.2204 5.6884,-201 64.7295,-118.1466 363.8026,-54.6197 506.5932,-28.4969"/>
+<polygon fill="#191970" stroke="#191970" points="37.9063,-266.8441 48.3725,-268.4897 41.2169,-260.6764 37.9063,-266.8441"/>
 </g>
 <!-- Node63 -->
 <g id="node8" class="node">
 <title>Node63</title>
-<g id="a_node8"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
-<polygon fill="#ffffff" stroke="#000000" points="525,-67.5 525,-97.5 677,-97.5 677,-67.5 525,-67.5"/>
-<text text-anchor="start" x="533" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="601" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
+<g id="a_node8"><a xlink:href="apply__history__best_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/apply_history_best.h">
+<polygon fill="#ffffff" stroke="#000000" points="14.6884,-201.5 14.6884,-231.5 166.6884,-231.5 166.6884,-201.5 14.6884,-201.5"/>
+<text text-anchor="start" x="22.6884" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="90.6884" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/apply_history_best.h</text>
 </a>
 </g>
 </g>
 <!-- Node62&#45;&gt;Node63 -->
-<g id="edge8" class="edge">
+<g id="edge10" class="edge">
 <title>Node62&#45;&gt;Node63</title>
-<path fill="none" stroke="#191970" d="M548.9286,-203.6055C592.8504,-195.0729 639.7996,-182.2547 653,-165 670.0769,-142.6782 643.1427,-114.6161 622.0939,-97.6438"/>
-<polygon fill="#191970" stroke="#191970" points="548.2113,-200.179 539.0311,-205.468 549.5059,-207.0582 548.2113,-200.179"/>
-</g>
-<!-- Node64 -->
-<g id="node9" class="node">
-<title>Node64</title>
-<g id="a_node9"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
-<polygon fill="#ffffff" stroke="#000000" points="492,-134.5 492,-164.5 644,-164.5 644,-134.5 492,-134.5"/>
-<text text-anchor="start" x="500" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="568" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/feature_extractor.h</text>
-</a>
+<path fill="none" stroke="#191970" d="M90.6884,-258.0249C90.6884,-249.128 90.6884,-239.4287 90.6884,-231.6432"/>
+<polygon fill="#191970" stroke="#191970" points="87.1885,-258.2966 90.6884,-268.2967 94.1885,-258.2967 87.1885,-258.2966"/>
 </g>
+<!-- Node64&#45;&gt;Node57 -->
+<g id="edge14" class="edge">
+<title>Node64&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M433.04,-331.4516C457.2837,-321.0392 486.8929,-308.3224 509.5263,-298.6017"/>
+<polygon fill="#191970" stroke="#191970" points="431.505,-328.3016 423.6979,-335.4639 434.2675,-334.7335 431.505,-328.3016"/>
 </g>
-<!-- Node62&#45;&gt;Node64 -->
-<g id="edge10" class="edge">
-<title>Node62&#45;&gt;Node64</title>
-<path fill="none" stroke="#191970" d="M495.0076,-196.0761C510.8339,-185.9774 529.6071,-173.9983 544.1513,-164.7177"/>
-<polygon fill="#191970" stroke="#191970" points="493.1113,-193.1342 486.564,-201.4639 496.8768,-199.0352 493.1113,-193.1342"/>
+<!-- Node64&#45;&gt;Node58 -->
+<g id="edge17" class="edge">
+<title>Node64&#45;&gt;Node58</title>
+<path fill="none" stroke="#191970" d="M406.0236,-327.1997C411.7006,-318.6788 417.5942,-308.7351 421.6884,-299 431.1471,-276.5096 436.328,-248.4976 438.8018,-231.5813"/>
+<polygon fill="#191970" stroke="#191970" points="403.0929,-325.2841 400.2705,-335.4961 408.8452,-329.273 403.0929,-325.2841"/>
 </g>
-<!-- Node62&#45;&gt;Node65 -->
-<g id="edge12" class="edge">
-<title>Node62&#45;&gt;Node65</title>
-<path fill="none" stroke="#191970" d="M431.1701,-195.7951C415.7366,-185.7558 397.5295,-173.9124 383.3944,-164.7177"/>
-<polygon fill="#191970" stroke="#191970" points="429.5937,-198.945 439.8848,-201.4639 433.4107,-193.0772 429.5937,-198.945"/>
+<!-- Node64&#45;&gt;Node59 -->
+<g id="edge16" class="edge">
+<title>Node64&#45;&gt;Node59</title>
+<path fill="none" stroke="#191970" d="M319.7732,-332.5322C290.1015,-323.2721 260.2377,-311.3471 251.6884,-299 243.8452,-287.6726 245.7846,-280.4488 251.6884,-268 292.1431,-182.6968 393.1585,-123.6182 445.7277,-97.5482"/>
+<polygon fill="#191970" stroke="#191970" points="318.7875,-335.8906 329.3729,-335.4422 320.8183,-329.1916 318.7875,-335.8906"/>
 </g>
-<!-- Node63&#45;&gt;Node59 -->
-<g id="edge9" class="edge">
-<title>Node63&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M514.9403,-65.787C458.8325,-54.8907 386.2715,-40.7991 332.2419,-30.3064"/>
-<polygon fill="#191970" stroke="#191970" points="514.3773,-69.2429 524.8612,-67.7136 515.7119,-62.3713 514.3773,-69.2429"/>
+<!-- Node65 -->
+<g id="node10" class="node">
+<title>Node65</title>
+<g id="a_node10"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
+<polygon fill="#ffffff" stroke="#000000" points="260.6884,-268.5 260.6884,-298.5 412.6884,-298.5 412.6884,-268.5 260.6884,-268.5"/>
+<text text-anchor="start" x="268.6884" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="336.6884" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/feature_extractor.h</text>
+</a>
 </g>
-<!-- Node65&#45;&gt;Node59 -->
-<g id="edge14" class="edge">
-<title>Node65&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M312.5332,-129.6042C298.5908,-121.6503 284.5396,-111.1885 275,-98 260.4422,-77.8738 256.8249,-48.3664 256.0476,-30.6351"/>
-<polygon fill="#191970" stroke="#191970" points="311.1627,-132.8404 321.6311,-134.4723 314.4653,-126.6684 311.1627,-132.8404"/>
 </g>
-<!-- Node65&#45;&gt;Node61 -->
-<g id="edge13" class="edge">
-<title>Node65&#45;&gt;Node61</title>
-<path fill="none" stroke="#191970" d="M360,-124.0249C360,-115.128 360,-105.4287 360,-97.6432"/>
-<polygon fill="#191970" stroke="#191970" points="356.5001,-124.2966 360,-134.2967 363.5001,-124.2967 356.5001,-124.2966"/>
+<!-- Node64&#45;&gt;Node65 -->
+<g id="edge15" class="edge">
+<title>Node64&#45;&gt;Node65</title>
+<path fill="none" stroke="#191970" d="M370.7219,-327.3509C363.3467,-317.8482 355.0151,-307.1132 348.4413,-298.6432"/>
+<polygon fill="#191970" stroke="#191970" points="367.9926,-329.5427 376.8888,-335.2967 373.5225,-325.2508 367.9926,-329.5427"/>
+</g>
+<!-- Node66&#45;&gt;Node57 -->
+<g id="edge19" class="edge">
+<title>Node66&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M626.5118,-330.3561C608.7756,-320.1995 587.6192,-308.0843 571.2627,-298.7177"/>
+<polygon fill="#191970" stroke="#191970" points="625.0142,-333.5317 635.4313,-335.4639 628.4928,-327.4572 625.0142,-333.5317"/>
+</g>
+<!-- Node66&#45;&gt;Node58 -->
+<g id="edge21" class="edge">
+<title>Node66&#45;&gt;Node58</title>
+<path fill="none" stroke="#191970" d="M658.0919,-325.3637C654.1691,-306.9874 646.098,-282.7862 629.6884,-268 612.7292,-252.7186 561.5804,-239.3017 516.7649,-229.9964"/>
+<polygon fill="#191970" stroke="#191970" points="654.6876,-326.2017 659.954,-335.395 661.5701,-324.9242 654.6876,-326.2017"/>
+</g>
+<!-- Node66&#45;&gt;Node59 -->
+<g id="edge20" class="edge">
+<title>Node66&#45;&gt;Node59</title>
+<path fill="none" stroke="#191970" d="M664.3028,-324.8312C664.8242,-307.5721 663.0071,-284.9641 652.6884,-268 609.7049,-197.3339 544.7537,-233.0158 497.6884,-165 483.5542,-144.5741 479.8012,-115.1627 478.8828,-97.5313"/>
+<polygon fill="#191970" stroke="#191970" points="660.7811,-325.0993 663.6882,-335.2874 667.769,-325.51 660.7811,-325.0993"/>
+</g>
+<!-- Node66&#45;&gt;Node60 -->
+<g id="edge22" class="edge">
+<title>Node66&#45;&gt;Node60</title>
+<path fill="none" stroke="#191970" d="M692.1544,-328.9659C721.8909,-305.1754 762.6884,-263.8987 762.6884,-216.5 762.6884,-216.5 762.6884,-216.5 762.6884,-149.5 762.6884,-85.9975 689.7145,-49.0364 636.6943,-30.5625"/>
+<polygon fill="#191970" stroke="#191970" points="689.8654,-326.3117 684.1036,-335.2029 694.1524,-331.8454 689.8654,-326.3117"/>
+</g>
+<!-- Node66&#45;&gt;Node61 -->
+<g id="edge23" class="edge">
+<title>Node66&#45;&gt;Node61</title>
+<path fill="none" stroke="#191970" d="M684.0219,-327.6413C690.7196,-319.3061 697.2064,-309.3658 700.6884,-299 705.0757,-285.9394 705.9475,-280.7346 700.6884,-268 681.6825,-221.9782 635.1837,-184.4361 606.5579,-164.6667"/>
+<polygon fill="#191970" stroke="#191970" points="681.2196,-325.5321 677.3573,-335.3979 686.529,-330.094 681.2196,-325.5321"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/arg__info_8h__incl.svg b/docs/reference/api/doxygen/arg__info_8h__incl.svg
index c391217f0..7a374ef38 100644
--- a/docs/reference/api/doxygen/arg__info_8h__incl.svg
+++ b/docs/reference/api/doxygen/arg__info_8h__incl.svg
@@ -4,1583 +4,1580 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/meta_schedule/arg_info.h Pages: 1 -->
-<svg width="3536pt" height="1035pt"
- viewBox="0.00 0.00 3536.00 1035.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1031)">
+<svg width="3536pt" height="979pt"
+ viewBox="0.00 0.00 3536.00 979.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 975)">
 <title>include/tvm/meta_schedule/arg_info.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1031 3532,-1031 3532,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-975 3532,-975 3532,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="1778,-996.5 1778,-1026.5 1930,-1026.5 1930,-996.5 1778,-996.5"/>
-<text text-anchor="start" x="1786" y="-1014.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1854" y="-1003.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="1977,-940.5 1977,-970.5 2129,-970.5 2129,-940.5 1977,-940.5"/>
+<text text-anchor="start" x="1985" y="-958.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2053" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
 <title>Node1</title>
 <g id="a_node2"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2283.5,-548.5 2283.5,-567.5 2382.5,-567.5 2382.5,-548.5 2283.5,-548.5"/>
-<text text-anchor="middle" x="2333" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1773.5,-548.5 1773.5,-567.5 1872.5,-567.5 1872.5,-548.5 1773.5,-548.5"/>
+<text text-anchor="middle" x="1823" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node1 -->
 <g id="edge1" class="edge">
 <title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M1862.7124,-996.2679C1897.6238,-936.6026 2033.8567,-716.5457 2206,-604 2229.2888,-588.774 2258.2715,-577.7804 2282.6672,-570.3667"/>
-<polygon fill="#191970" stroke="#191970" points="2283.8457,-573.6688 2292.4627,-567.5046 2281.8824,-566.9498 2283.8457,-573.6688"/>
+<path fill="none" stroke="#191970" d="M2065.7073,-940.2644C2082.2296,-918.8562 2109,-877.9978 2109,-838 2109,-838 2109,-838 2109,-726 2109,-668.96 1937.7309,-599.5951 1860.2657,-571.1386"/>
+<polygon fill="#191970" stroke="#191970" points="1861.0169,-567.6875 1850.4229,-567.5579 1858.6238,-574.2657 1861.0169,-567.6875"/>
+</g>
+<!-- Node2 -->
+<g id="node3" class="node">
+<title>Node2</title>
+<g id="a_node3"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="1100.5,-492.5 1100.5,-511.5 1221.5,-511.5 1221.5,-492.5 1100.5,-492.5"/>
+<text text-anchor="middle" x="1161" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
+</a>
+</g>
+</g>
+<!-- Node0&#45;&gt;Node2 -->
+<g id="edge132" class="edge">
+<title>Node0&#45;&gt;Node2</title>
+<path fill="none" stroke="#191970" d="M1976.836,-955.1192C1709.5589,-953.0019 825.9473,-939.1126 742,-848 686.9551,-788.2568 580.8281,-949.9152 876,-716 972.5823,-639.4613 1091.3846,-552.4888 1139.3759,-517.643"/>
+<polygon fill="#191970" stroke="#191970" points="1141.7069,-520.2761 1147.7476,-511.572 1137.5974,-514.6093 1141.7069,-520.2761"/>
+</g>
+<!-- Node6 -->
+<g id="node7" class="node">
+<title>Node6</title>
+<g id="a_node7"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
+<polygon fill="#ffffff" stroke="#000000" points="1366.5,-123.5 1366.5,-142.5 1485.5,-142.5 1485.5,-123.5 1366.5,-123.5"/>
+<text text-anchor="middle" x="1426" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
+</a>
+</g>
+</g>
+<!-- Node0&#45;&gt;Node6 -->
+<g id="edge135" class="edge">
+<title>Node0&#45;&gt;Node6</title>
+<path fill="none" stroke="#191970" d="M1976.8835,-955.2022C1605.336,-953.2597 0,-939.0218 0,-838 0,-838 0,-838 0,-726 0,-665.0505 892.9419,-197.55 951,-179 1024.5658,-155.4951 1242.894,-141.8292 1356.1258,-136.1375"/>
+<polygon fill="#191970" stroke="#191970" points="1356.6012,-139.6184 1366.4157,-135.6277 1356.2547,-132.6269 1356.6012,-139.6184"/>
+</g>
+<!-- Node24 -->
+<g id="node25" class="node">
+<title>Node24</title>
+<g id="a_node25"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
+<polygon fill="#ffffff" stroke="#000000" points="1760,-297 1760,-316 1898,-316 1898,-297 1760,-297"/>
+<text text-anchor="middle" x="1829" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
+</a>
+</g>
+</g>
+<!-- Node0&#45;&gt;Node24 -->
+<g id="edge134" class="edge">
+<title>Node0&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M2129.0344,-952.7407C2335.1176,-944.2564 2889,-914.3514 2889,-838 2889,-838 2889,-838 2889,-782 2889,-617.9818 2391.4708,-494.7022 2243,-425 2203.2542,-406.3406 2189.5578,-410.0047 2151,-389 2130.311,-377.7294 2129.707,-367.158 2108,-358 2022.7695,-322.0418 1993.8379,-339.4846 1903,-322 1896.7406,-320.7952 1890.193,-319.4897 1883.692,-318.1651"/>
+<polygon fill="#191970" stroke="#191970" points="1884.0627,-314.6681 1873.5623,-316.0795 1882.651,-321.5243 1884.0627,-314.6681"/>
 </g>
 <!-- Node28 -->
 <g id="node29" class="node">
 <title>Node28</title>
 <g id="a_node29"><a xlink:href="shape__tuple_8h.html" target="_top" xlink:title="Runtime ShapeTuple container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="894,-291.5 894,-321.5 1020,-321.5 1020,-291.5 894,-291.5"/>
-<text text-anchor="start" x="902" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="957" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/shape_tuple.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2660,-291.5 2660,-321.5 2786,-321.5 2786,-291.5 2660,-291.5"/>
+<text text-anchor="start" x="2668" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="2723" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/shape_tuple.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node28 -->
-<g id="edge130" class="edge">
+<g id="edge133" class="edge">
 <title>Node0&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1777.7847,-1009.0843C1487.0537,-999.3751 464,-960.486 464,-894 464,-894 464,-894 464,-782 464,-709.8566 629.3876,-472.5076 678,-425 734.9544,-369.3399 822.0632,-338.0769 883.7759,-321.8166"/>
-<polygon fill="#191970" stroke="#191970" points="885.0185,-325.1114 893.8369,-319.2388 883.2811,-318.3304 885.0185,-325.1114"/>
+<path fill="none" stroke="#191970" d="M2129.1964,-953.462C2431.2352,-944.8904 3528,-908.8849 3528,-838 3528,-838 3528,-838 3528,-670 3528,-331.875 2786,-840.125 2786,-502 2786,-502 2786,-502 2786,-440.5 2786,-398.6482 2760.5203,-355.8333 2741.9688,-330.2067"/>
+<polygon fill="#191970" stroke="#191970" points="2744.5406,-327.8015 2735.749,-321.8889 2738.9346,-331.9935 2744.5406,-327.8015"/>
 </g>
 <!-- Node45 -->
-<g id="node41" class="node">
+<g id="node43" class="node">
 <title>Node45</title>
-<g id="a_node41"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
-<polygon fill="#ffffff" stroke="#000000" points="1923.5,-940.5 1923.5,-959.5 2024.5,-959.5 2024.5,-940.5 1923.5,-940.5"/>
-<text text-anchor="middle" x="1974" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/function.h</text>
+<g id="a_node43"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
+<polygon fill="#ffffff" stroke="#000000" points="1912.5,-884.5 1912.5,-903.5 2013.5,-903.5 2013.5,-884.5 1912.5,-884.5"/>
+<text text-anchor="middle" x="1963" y="-891.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/function.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node45 -->
-<g id="edge131" class="edge">
+<g id="edge136" class="edge">
 <title>Node0&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1883.3553,-996.4554C1902.5873,-986.599 1927.5612,-973.7999 1946.4241,-964.1327"/>
-<polygon fill="#191970" stroke="#191970" points="1948.1013,-967.206 1955.4043,-959.5303 1944.9086,-960.9765 1948.1013,-967.206"/>
-</g>
-<!-- Node2 -->
-<g id="node3" class="node">
-<title>Node2</title>
-<g id="a_node3"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2245.5,-492.5 2245.5,-511.5 2366.5,-511.5 2366.5,-492.5 2245.5,-492.5"/>
-<text text-anchor="middle" x="2306" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
-</a>
-</g>
+<path fill="none" stroke="#191970" d="M2030.7528,-940.2977C2017.0033,-930.9022 1999.4079,-918.8787 1985.5943,-909.4395"/>
+<polygon fill="#191970" stroke="#191970" points="1987.3725,-906.4155 1977.1414,-903.6633 1983.4232,-912.195 1987.3725,-906.4155"/>
 </g>
 <!-- Node1&#45;&gt;Node2 -->
 <g id="edge2" class="edge">
 <title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M2328.297,-548.2455C2324.6576,-540.6973 2319.5076,-530.0158 2315.0738,-520.8197"/>
-<polygon fill="#191970" stroke="#191970" points="2318.1449,-519.1303 2310.6491,-511.6427 2311.8395,-522.1704 2318.1449,-519.1303"/>
+<path fill="none" stroke="#191970" d="M1773.4082,-553.9229C1677.1256,-545.9898 1458.1549,-527.8634 1274,-512 1260.342,-510.8235 1245.7573,-509.5475 1231.7595,-508.313"/>
+<polygon fill="#191970" stroke="#191970" points="1231.9616,-504.8173 1221.6923,-507.4235 1231.3454,-511.7902 1231.9616,-504.8173"/>
 </g>
 <!-- Node3 -->
 <g id="node4" class="node">
 <title>Node3</title>
 <g id="a_node4"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="#ffffff" stroke="#000000" points="2360.5,-358.5 2360.5,-388.5 2473.5,-388.5 2473.5,-358.5 2360.5,-358.5"/>
-<text text-anchor="start" x="2368.5" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="2417" y="-365.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1813.5,-358.5 1813.5,-388.5 1926.5,-388.5 1926.5,-358.5 1813.5,-358.5"/>
+<text text-anchor="start" x="1821.5" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="1870" y="-365.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node3 -->
-<g id="edge121" class="edge">
+<g id="edge123" class="edge">
 <title>Node1&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M2365.3394,-548.3452C2383.2678,-541.2519 2404.2515,-529.7099 2416,-512 2438.4937,-478.0927 2431.6872,-428.496 2424.4877,-398.767"/>
-<polygon fill="#191970" stroke="#191970" points="2427.7911,-397.5655 2421.871,-388.779 2421.0196,-399.3395 2427.7911,-397.5655"/>
-</g>
-<!-- Node6 -->
-<g id="node7" class="node">
-<title>Node6</title>
-<g id="a_node7"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
-<polygon fill="#ffffff" stroke="#000000" points="1890.5,-123.5 1890.5,-142.5 2009.5,-142.5 2009.5,-123.5 1890.5,-123.5"/>
-<text text-anchor="middle" x="1950" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
-</a>
-</g>
+<path fill="none" stroke="#191970" d="M1802.8439,-548.4543C1789.1185,-540.7394 1772.0365,-528.4561 1764,-512 1760.0993,-504.0127 1760.828,-500.3037 1764,-492 1779.3762,-451.7476 1815.415,-416.5229 1841.1994,-395.1617"/>
+<polygon fill="#191970" stroke="#191970" points="1843.5899,-397.73 1849.1715,-388.7246 1839.1923,-392.2838 1843.5899,-397.73"/>
 </g>
 <!-- Node1&#45;&gt;Node6 -->
-<g id="edge125" class="edge">
+<g id="edge127" class="edge">
 <title>Node1&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2360.5266,-548.4446C2442.5369,-518.3074 2675.8013,-419.5854 2614,-291 2562.2333,-183.2928 2179.86,-147.2271 2019.969,-136.7728"/>
-<polygon fill="#191970" stroke="#191970" points="2019.9678,-133.2657 2009.7649,-136.1208 2019.5213,-140.2514 2019.9678,-133.2657"/>
+<path fill="none" stroke="#191970" d="M1773.0901,-550.7364C1647.0117,-531.2603 1321.3018,-473.5552 1259,-389 1250.8272,-377.908 1250.4699,-368.8196 1259,-358 1301.447,-304.1604 1364.1817,-373.8822 1409,-322 1450.7514,-273.6681 1439.0169,-190.1517 1430.8608,-152.4566"/>
+<polygon fill="#191970" stroke="#191970" points="1434.27,-151.6642 1428.6049,-142.7111 1427.4503,-153.2428 1434.27,-151.6642"/>
 </g>
 <!-- Node7 -->
 <g id="node8" class="node">
 <title>Node7</title>
 <g id="a_node8"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
-<polygon fill="#ffffff" stroke="#000000" points="1105.5,-56.5 1105.5,-86.5 1234.5,-86.5 1234.5,-56.5 1105.5,-56.5"/>
-<text text-anchor="start" x="1113.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
-<text text-anchor="middle" x="1170" y="-63.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1901.5,-56.5 1901.5,-86.5 2030.5,-86.5 2030.5,-56.5 1901.5,-56.5"/>
+<text text-anchor="start" x="1909.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
+<text text-anchor="middle" x="1966" y="-63.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node7 -->
-<g id="edge123" class="edge">
+<g id="edge125" class="edge">
 <title>Node1&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M2283.3543,-550.8669C2191.0775,-537.7912 1986.8075,-509.7039 1814,-492 1795.6654,-490.1216 507.4881,-397.2359 491,-389 455.5041,-371.2697 454.0694,-354.9737 432,-322 408.9978,-287.6326 373.4742,-267.537 399,-235 528.0781,-70.468 650.8259,-163.4418 856,-123 958.7235,-102.7522 988.1299,-105.8555 1095.5274,-87.0123"/>
-<polygon fill="#191970" stroke="#191970" points="1096.2358,-90.4413 1105.4688,-85.245 1095.0105,-83.5494 1096.2358,-90.4413"/>
+<path fill="none" stroke="#191970" d="M1872.5271,-555.2716C2068.5699,-543.7998 2789.2116,-495.0817 2988,-389 3058.7935,-351.2217 3080.1902,-329.8927 3109,-255 3146.5139,-157.4804 3036.8443,-200.9368 2934,-179 2605.9185,-109.0199 2204.978,-82.9424 2041.1486,-74.7498"/>
+<polygon fill="#191970" stroke="#191970" points="2040.8637,-71.2316 2030.7037,-74.2357 2040.5195,-78.2231 2040.8637,-71.2316"/>
 </g>
 <!-- Node12 -->
 <g id="node13" class="node">
 <title>Node12</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="3169,-62 3169,-81 3213,-81 3213,-62 3169,-62"/>
-<text text-anchor="middle" x="3191" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="358,-62 358,-81 402,-81 402,-62 358,-62"/>
+<text text-anchor="middle" x="380" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
 </g>
 <!-- Node1&#45;&gt;Node12 -->
-<g id="edge126" class="edge">
+<g id="edge128" class="edge">
 <title>Node1&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M2382.6982,-556.3887C2455.3265,-553.0331 2594.1953,-542.8115 2708,-512 2768.7243,-495.5595 2786.1392,-491.6115 2838,-456 2998.3753,-345.8743 3022.0036,-294.5413 3144,-143 3157.7546,-125.9143 3171.2782,-104.7516 3180.2615,-89.899"/>
-<polygon fill="#191970" stroke="#191970" points="3183.3582,-91.5393 3185.4659,-81.1562 3177.3433,-87.9587 3183.3582,-91.5393"/>
+<path fill="none" stroke="#191970" d="M1773.3365,-556.6333C1618.0614,-552.1429 1143.0752,-536.7016 991,-512 786.3456,-478.7579 701.6761,-520.0406 541,-389 440.6306,-307.1429 396.6987,-146.5424 384.0631,-90.9711"/>
+<polygon fill="#191970" stroke="#191970" points="387.4469,-90.0612 381.889,-81.0412 380.6089,-91.5584 387.4469,-90.0612"/>
 </g>
 <!-- Node13 -->
 <g id="node14" class="node">
 <title>Node13</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2600.5,-62 2600.5,-81 2669.5,-81 2669.5,-62 2600.5,-62"/>
-<text text-anchor="middle" x="2635" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="853.5,-62 853.5,-81 922.5,-81 922.5,-62 853.5,-62"/>
+<text text-anchor="middle" x="888" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
 </g>
 <!-- Node1&#45;&gt;Node13 -->
-<g id="edge127" class="edge">
+<g id="edge129" class="edge">
 <title>Node1&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M2382.7127,-554.8273C2488.4992,-546.8415 2729.642,-521.6466 2771,-456 2800.7872,-408.7196 3000.2251,-657.7686 2768,-179 2746.5368,-134.7502 2697.1637,-102.7559 2664.9861,-85.684"/>
-<polygon fill="#191970" stroke="#191970" points="2666.4972,-82.5254 2656.0032,-81.0671 2663.2974,-88.7513 2666.4972,-82.5254"/>
+<path fill="none" stroke="#191970" d="M1773.2369,-556.87C1611.4842,-552.9501 1107.0672,-538.696 1039,-512 936.2242,-471.6912 910.2351,-427.8868 879,-322 853.9683,-237.1426 860.7295,-210.4714 874,-123 875.6358,-112.2177 878.727,-100.4634 881.5894,-90.9103"/>
+<polygon fill="#191970" stroke="#191970" points="884.9727,-91.8184 884.6341,-81.229 878.2952,-89.7182 884.9727,-91.8184"/>
 </g>
 <!-- Node14 -->
 <g id="node15" class="node">
 <title>Node14</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="767.5,-62 767.5,-81 812.5,-81 812.5,-62 767.5,-62"/>
-<text text-anchor="middle" x="790" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="3009.5,-62 3009.5,-81 3054.5,-81 3054.5,-62 3009.5,-62"/>
+<text text-anchor="middle" x="3032" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
 </g>
 <!-- Node1&#45;&gt;Node14 -->
-<g id="edge128" class="edge">
+<g id="edge130" class="edge">
 <title>Node1&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2283.4635,-556.2186C2101.4743,-549.2926 1452.8095,-521.2413 922,-456 842.2979,-446.2039 823.6735,-435.026 744,-425 642.0477,-412.1705 379.1108,-425.3536 283,-389 202.0749,-358.3903 152,-331.5207 152,-245 152,-245 152,-245 152,-189 152,-122.2134 222.7845,-141.3525 287,-123 376.0824,-97.5406 660.2454,-79.0521 757.2116,-73.3484"/>
-<polygon fill="#191970" stroke="#191970" points="757.5274,-76.836 767.3072,-72.7611 757.1208,-69.8478 757.5274,-76.836"/>
+<path fill="none" stroke="#191970" d="M1872.552,-557.0511C2021.457,-553.3507 2477.324,-535.8106 2845,-456 3084.5955,-403.9915 3376,-551.6752 3376,-306.5 3376,-306.5 3376,-306.5 3376,-189 3376,-123.5702 3150.32,-87.0469 3064.5467,-75.5285"/>
+<polygon fill="#191970" stroke="#191970" points="3064.9416,-72.0503 3054.5708,-74.2174 3064.0295,-78.9907 3064.9416,-72.0503"/>
 </g>
 <!-- Node16 -->
 <g id="node17" class="node">
 <title>Node16</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1812.5,-235.5 1812.5,-254.5 1859.5,-254.5 1859.5,-235.5 1812.5,-235.5"/>
-<text text-anchor="middle" x="1836" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1210.5,-235.5 1210.5,-254.5 1257.5,-254.5 1257.5,-235.5 1210.5,-235.5"/>
+<text text-anchor="middle" x="1234" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
 </g>
 <!-- Node1&#45;&gt;Node16 -->
-<g id="edge129" class="edge">
+<g id="edge131" class="edge">
 <title>Node1&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2283.3874,-549.853C2198.9291,-534.9185 2032.7649,-500.5563 1996,-456 1948.5623,-398.509 2014.3173,-347.7538 1966,-291 1953.3068,-276.0905 1903.757,-261.4532 1869.6342,-252.8277"/>
-<polygon fill="#191970" stroke="#191970" points="1870.1449,-249.3488 1859.5975,-250.3502 1868.4673,-256.1448 1870.1449,-249.3488"/>
+<path fill="none" stroke="#191970" d="M1773.2398,-556.5629C1617.4501,-551.835 1145.5982,-535.7003 1081,-512 1041.9455,-497.6714 1036.0843,-484.7316 1006,-456 949.8917,-402.4144 898.6989,-350.0708 949,-291 965.1491,-272.0354 1128.0775,-254.7856 1200.0863,-248.0234"/>
+<polygon fill="#191970" stroke="#191970" points="1200.7903,-251.4732 1210.425,-247.0661 1200.1448,-244.5031 1200.7903,-251.4732"/>
 </g>
 <!-- Node21 -->
 <g id="node22" class="node">
 <title>Node21</title>
 <g id="a_node22"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
-<polygon fill="#ffffff" stroke="#000000" points="1405.5,-179.5 1405.5,-198.5 1534.5,-198.5 1534.5,-179.5 1405.5,-179.5"/>
-<text text-anchor="middle" x="1470" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1770.5,-179.5 1770.5,-198.5 1899.5,-198.5 1899.5,-179.5 1770.5,-179.5"/>
+<text text-anchor="middle" x="1835" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node21 -->
-<g id="edge124" class="edge">
+<g id="edge126" class="edge">
 <title>Node1&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2292.4129,-548.4769C2224.5484,-531.6293 2092.9043,-494.6809 2067,-456 2026.068,-394.8794 2117.7209,-354.153 2080,-291 2053.0802,-245.9304 2026.3283,-249.9335 1976,-235 1794.0333,-181.0067 1736.8999,-218.7707 1544.9904,-199.1434"/>
-<polygon fill="#191970" stroke="#191970" points="1545.1704,-195.6429 1534.8566,-198.0667 1544.4309,-202.6037 1545.1704,-195.6429"/>
+<path fill="none" stroke="#191970" d="M1842.4848,-548.4557C1860.8316,-539.3714 1889.0026,-525.1711 1913,-512 1979.8884,-475.288 1989.1273,-450.6152 2061,-425 2110.3516,-407.4113 2259.8987,-428.7748 2294,-389 2406.6615,-257.5948 2066.3434,-209.6069 1909.5754,-194.7705"/>
+<polygon fill="#191970" stroke="#191970" points="1909.8014,-191.2766 1899.5215,-193.8407 1909.1567,-198.2468 1909.8014,-191.2766"/>
 </g>
 <!-- Node25 -->
 <g id="node26" class="node">
 <title>Node25</title>
 <g id="a_node26"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
-<polygon fill="#ffffff" stroke="#000000" points="2076.5,-425.5 2076.5,-455.5 2189.5,-455.5 2189.5,-425.5 2076.5,-425.5"/>
-<text text-anchor="start" x="2084.5" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="2133" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1053.5,-425.5 1053.5,-455.5 1166.5,-455.5 1166.5,-425.5 1053.5,-425.5"/>
+<text text-anchor="start" x="1061.5" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="1110" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node25 -->
-<g id="edge122" class="edge">
+<g id="edge124" class="edge">
 <title>Node1&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M2310.1196,-548.4209C2290.1164,-539.7499 2260.517,-526.1946 2236,-512 2209.626,-496.7303 2181.2942,-476.7782 2161.0336,-461.8318"/>
-<polygon fill="#191970" stroke="#191970" points="2162.9861,-458.922 2152.8738,-455.7609 2158.8076,-464.5382 2162.9861,-458.922"/>
+<path fill="none" stroke="#191970" d="M1773.4375,-556.7248C1612.7735,-552.353 1115.3395,-536.8543 1092,-512 1080.0994,-499.327 1085.8919,-479.9769 1093.9538,-464.6012"/>
+<polygon fill="#191970" stroke="#191970" points="1097.1606,-466.0468 1099.1289,-455.6364 1091.0982,-462.5471 1097.1606,-466.0468"/>
 </g>
 <!-- Node43 -->
-<g id="node39" class="node">
+<g id="node41" class="node">
 <title>Node43</title>
-<g id="a_node39"><a xlink:href="repr__printer_8h.html" target="_top" xlink:title="Printer class to print repr string of each AST/IR nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2514.5,-492.5 2514.5,-511.5 2645.5,-511.5 2645.5,-492.5 2514.5,-492.5"/>
-<text text-anchor="middle" x="2580" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/repr_printer.h</text>
+<g id="a_node41"><a xlink:href="repr__printer_8h.html" target="_top" xlink:title="Printer class to print repr string of each AST/IR nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="1772.5,-492.5 1772.5,-511.5 1903.5,-511.5 1903.5,-492.5 1772.5,-492.5"/>
+<text text-anchor="middle" x="1838" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/repr_printer.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node43 -->
-<g id="edge118" class="edge">
+<g id="edge120" class="edge">
 <title>Node1&#45;&gt;Node43</title>
-<path fill="none" stroke="#191970" d="M2375.1802,-548.4369C2417.4523,-538.8529 2482.6062,-524.0812 2528.0417,-513.78"/>
-<polygon fill="#191970" stroke="#191970" points="2529.0152,-517.1482 2537.9938,-511.5237 2527.4674,-510.3215 2529.0152,-517.1482"/>
+<path fill="none" stroke="#191970" d="M1825.6128,-548.2455C1827.5916,-540.8579 1830.3744,-530.4689 1832.8012,-521.4087"/>
+<polygon fill="#191970" stroke="#191970" points="1836.2105,-522.2078 1835.4171,-511.6427 1829.4489,-520.3966 1836.2105,-522.2078"/>
 </g>
 <!-- Node2&#45;&gt;Node3 -->
 <g id="edge3" class="edge">
 <title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M2314.2879,-492.4976C2322.3874,-483.2045 2335.0601,-468.6461 2346,-456 2363.2658,-436.0413 2382.8235,-413.3064 2397.034,-396.7641"/>
-<polygon fill="#191970" stroke="#191970" points="2400.0776,-398.5923 2403.9374,-388.7255 2394.767,-394.0317 2400.0776,-398.5923"/>
+<path fill="none" stroke="#191970" d="M1221.5464,-493.6207C1226.4324,-493.0379 1231.2963,-492.4888 1236,-492 1449.6682,-469.795 1514.3579,-524.389 1718,-456 1742.0743,-447.9151 1743.9186,-437.544 1766,-425 1786.0232,-413.6252 1808.8979,-402.1416 1828.0865,-392.9138"/>
+<polygon fill="#191970" stroke="#191970" points="1829.6628,-396.0397 1837.1816,-388.5753 1826.649,-389.7217 1829.6628,-396.0397"/>
 </g>
 <!-- Node2&#45;&gt;Node6 -->
-<g id="edge83" class="edge">
+<g id="edge85" class="edge">
 <title>Node2&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2326.9857,-492.3445C2388.5874,-463.5268 2566.4913,-376.7448 2597,-322 2603.707,-309.9649 2605.3972,-301.9231 2597,-291 2526.6045,-199.4285 2172.0364,-154.5554 2019.7105,-139.2636"/>
-<polygon fill="#191970" stroke="#191970" points="2019.833,-135.7587 2009.5367,-138.2561 2019.1432,-142.7246 2019.833,-135.7587"/>
+<path fill="none" stroke="#191970" d="M1100.3818,-492.7017C1070.1747,-485.9928 1034.1814,-474.7049 1006,-456 973.9011,-434.6949 964.5211,-425.0753 951,-389 918.0477,-301.081 903.8034,-241.3542 974,-179 1001.9345,-154.1864 1236.1311,-140.9046 1355.9583,-135.6775"/>
+<polygon fill="#191970" stroke="#191970" points="1356.2955,-139.1663 1366.1363,-135.2409 1355.9954,-132.1728 1356.2955,-139.1663"/>
 </g>
 <!-- Node2&#45;&gt;Node7 -->
-<g id="edge79" class="edge">
+<g id="edge81" class="edge">
 <title>Node2&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M2305.7988,-492.4886C2305.5916,-477.9191 2305.5826,-449.2177 2308,-425 2310.9842,-395.1033 2337.4978,-318.7582 2326,-291 2294.1607,-214.133 2251.3127,-214.3592 2176,-179 2108.5605,-147.3372 2091.0079,-137.8478 2018,-123 1871.1666,-93.1381 1423.2374,-78.2366 1244.6498,-73.3587"/>
-<polygon fill="#191970" stroke="#191970" points="1244.6489,-69.8575 1234.5579,-73.0859 1244.4596,-76.855 1244.6489,-69.8575"/>
+<path fill="none" stroke="#191970" d="M1221.7077,-498.1448C1334.5105,-490.8577 1584.0536,-474.1562 1794,-456 1931.5095,-444.1081 1965.3261,-434.8064 2103,-425 2197.5117,-418.268 2870.2176,-431.3052 2955,-389 2989.4726,-371.7987 2997.0187,-358.273 3010,-322 3037.1095,-246.2494 2972.7307,-211.2004 2899,-179 2797.928,-134.8588 2513.5338,-155.8973 2404,-143 2275.7429,-127.8981 2127.672,-101.9209 2040.715,-85.7833"/>
+<polygon fill="#191970" stroke="#191970" points="2041.1503,-82.3043 2030.6785,-83.914 2039.8686,-89.186 2041.1503,-82.3043"/>
 </g>
 <!-- Node2&#45;&gt;Node12 -->
-<g id="edge115" class="edge">
+<g id="edge117" class="edge">
 <title>Node2&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M2352.9119,-492.4389C2393.0415,-483.9289 2452.1976,-470.5963 2503,-456 2679.5568,-405.2725 2732.6868,-409.8843 2894,-322 2936.2407,-298.987 2941.2781,-284.8485 2979,-255 3041.7598,-205.3397 3060.3678,-196.3755 3120,-143 3139.7966,-125.2805 3161.0932,-103.4205 3175.1424,-88.5694"/>
-<polygon fill="#191970" stroke="#191970" points="3177.8524,-90.7963 3182.1463,-81.1106 3172.7495,-86.0046 3177.8524,-90.7963"/>
+<path fill="none" stroke="#191970" d="M1100.2949,-496.328C1053.0194,-490.3829 986.4956,-478.6153 932,-456 697.1769,-358.5499 462.5692,-148.8034 397.6098,-88.2084"/>
+<polygon fill="#191970" stroke="#191970" points="399.9061,-85.5635 390.2191,-81.2725 395.1158,-90.6678 399.9061,-85.5635"/>
 </g>
 <!-- Node2&#45;&gt;Node13 -->
-<g id="edge116" class="edge">
+<g id="edge118" class="edge">
 <title>Node2&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M2347.2867,-492.4139C2439.4683,-468.3 2654,-395.9635 2654,-245 2654,-245 2654,-245 2654,-189 2654,-154.1936 2645.7747,-114.3226 2640.1479,-91.1197"/>
-<polygon fill="#191970" stroke="#191970" points="2643.477,-90.0065 2637.6419,-81.1633 2636.6887,-91.7152 2643.477,-90.0065"/>
+<path fill="none" stroke="#191970" d="M1100.3292,-493.8297C1050.6551,-486.0617 985.3783,-473.0399 965,-456 852.9145,-362.2766 874.6436,-155.4668 884.5985,-91.0492"/>
+<polygon fill="#191970" stroke="#191970" points="888.0753,-91.4747 886.2327,-81.0413 881.1668,-90.3465 888.0753,-91.4747"/>
 </g>
 <!-- Node2&#45;&gt;Node16 -->
-<g id="edge117" class="edge">
+<g id="edge119" class="edge">
 <title>Node2&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2292.744,-492.4528C2272.4194,-477.8366 2232.3053,-449.0749 2198,-425 2112.2239,-364.8037 2098.9935,-335.2422 2004,-291 1959.7795,-270.4048 1904.582,-257.5 1869.7995,-250.7713"/>
-<polygon fill="#191970" stroke="#191970" points="1870.1684,-247.2796 1859.6951,-248.8797 1868.8802,-254.1601 1870.1684,-247.2796"/>
+<path fill="none" stroke="#191970" d="M1117.811,-492.4531C1094.1872,-485.4945 1065.6212,-474.0509 1045,-456 984.388,-402.9428 918.1587,-352.6553 970,-291 999.0711,-256.4254 1135.9444,-247.8356 1200.3451,-245.7032"/>
+<polygon fill="#191970" stroke="#191970" points="1200.5044,-249.2001 1210.3957,-245.4038 1200.2959,-242.2033 1200.5044,-249.2001"/>
 </g>
 <!-- Node2&#45;&gt;Node21 -->
-<g id="edge81" class="edge">
+<g id="edge83" class="edge">
 <title>Node2&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2299.9751,-492.269C2272.3074,-447.9298 2156.2936,-265.8595 2098,-235 1991.5425,-178.6434 1682.2239,-209.7164 1545.2018,-199.0491"/>
-<polygon fill="#191970" stroke="#191970" points="1545.121,-195.5282 1534.8501,-198.1281 1544.5006,-202.5006 1545.121,-195.5282"/>
-</g>
-<!-- Node24 -->
-<g id="node25" class="node">
-<title>Node24</title>
-<g id="a_node25"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
-<polygon fill="#ffffff" stroke="#000000" points="2450,-297 2450,-316 2588,-316 2588,-297 2450,-297"/>
-<text text-anchor="middle" x="2519" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
-</a>
-</g>
+<path fill="none" stroke="#191970" d="M1221.5587,-493.7335C1226.442,-493.1254 1231.3019,-492.5397 1236,-492 1318.7299,-482.4955 1537.5094,-498.7043 1609,-456 1710.4857,-395.3784 1671.0528,-316.1628 1757,-235 1770.7251,-222.039 1788.7355,-211.1162 1803.9915,-203.1712"/>
+<polygon fill="#191970" stroke="#191970" points="1805.7017,-206.2291 1813.0716,-198.6175 1802.5636,-199.9718 1805.7017,-206.2291"/>
 </g>
 <!-- Node2&#45;&gt;Node24 -->
-<g id="edge80" class="edge">
+<g id="edge82" class="edge">
 <title>Node2&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M2306.0897,-492.3949C2306.8619,-466.787 2312.5581,-396.2337 2351,-358 2366.7192,-342.366 2418.7446,-327.8835 2460.7824,-318.2847"/>
-<polygon fill="#191970" stroke="#191970" points="2461.6812,-321.6702 2470.6763,-316.0722 2460.1535,-314.8389 2461.6812,-321.6702"/>
+<path fill="none" stroke="#191970" d="M1221.5499,-493.6538C1226.4352,-493.0636 1231.2979,-492.5038 1236,-492 1334.4272,-481.455 1590.1864,-497.6248 1680,-456 1696.6139,-448.3002 1778.1196,-361.4836 1813.0547,-323.7919"/>
+<polygon fill="#191970" stroke="#191970" points="1815.9129,-325.8562 1820.1338,-316.1384 1810.7741,-321.103 1815.9129,-325.8562"/>
 </g>
 <!-- Node2&#45;&gt;Node25 -->
 <g id="edge42" class="edge">
 <title>Node2&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M2278.9688,-492.3906C2253.9294,-483.4893 2216.0043,-470.0073 2185.0871,-459.0165"/>
-<polygon fill="#191970" stroke="#191970" points="2185.9153,-455.5964 2175.3206,-455.5446 2183.5705,-462.192 2185.9153,-455.5964"/>
+<path fill="none" stroke="#191970" d="M1153.0313,-492.3906C1146.5764,-484.6068 1137.2168,-473.3202 1128.8739,-463.2597"/>
+<polygon fill="#191970" stroke="#191970" points="1131.5536,-461.008 1122.476,-455.5446 1126.1652,-465.4764 1131.5536,-461.008"/>
 </g>
 <!-- Node26 -->
 <g id="node27" class="node">
 <title>Node26</title>
 <g id="a_node27"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
-<polygon fill="#ffffff" stroke="#000000" points="1380.5,-364 1380.5,-383 1505.5,-383 1505.5,-364 1380.5,-364"/>
-<text text-anchor="middle" x="1443" y="-371" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2159.5,-364 2159.5,-383 2284.5,-383 2284.5,-364 2159.5,-364"/>
+<text text-anchor="middle" x="2222" y="-371" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node26 -->
-<g id="edge82" class="edge">
+<g id="edge84" class="edge">
 <title>Node2&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2245.3155,-492.9641C2090.6915,-469.9407 1682.3804,-409.1435 1515.562,-384.3044"/>
-<polygon fill="#191970" stroke="#191970" points="1516.0515,-380.8388 1505.645,-382.8278 1515.0205,-387.7625 1516.0515,-380.8388"/>
+<path fill="none" stroke="#191970" d="M1221.8291,-501.0741C1352.9642,-498.4675 1658.3176,-488.9464 1756,-456 1780.0638,-447.8838 1780.1268,-433.6607 1804,-425 1835.6607,-413.5142 2038.2698,-391.8847 2149.3577,-380.6624"/>
+<polygon fill="#191970" stroke="#191970" points="2149.8871,-384.1268 2159.4861,-379.6424 2149.1857,-377.1621 2149.8871,-384.1268"/>
 </g>
 <!-- Node37 -->
-<g id="node34" class="node">
+<g id="node36" class="node">
 <title>Node37</title>
-<g id="a_node34"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="#ffffff" stroke="#000000" points="1385,-425.5 1385,-455.5 1501,-455.5 1501,-425.5 1385,-425.5"/>
-<text text-anchor="start" x="1393" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/packed</text>
-<text text-anchor="middle" x="1443" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_func.h</text>
+<g id="a_node36"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
+<polygon fill="#ffffff" stroke="#000000" points="1484,-425.5 1484,-455.5 1600,-455.5 1600,-425.5 1484,-425.5"/>
+<text text-anchor="start" x="1492" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/packed</text>
+<text text-anchor="middle" x="1542" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_func.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node37 -->
-<g id="edge84" class="edge">
+<g id="edge86" class="edge">
 <title>Node2&#45;&gt;Node37</title>
-<path fill="none" stroke="#191970" d="M2245.2871,-498.1019C2127.4153,-490.4766 1859.3564,-472.8565 1634,-456 1593.2129,-452.9492 1547.5506,-449.2538 1511.1023,-446.2366"/>
-<polygon fill="#191970" stroke="#191970" points="1511.3334,-442.7439 1501.0781,-445.4047 1510.7544,-449.7199 1511.3334,-442.7439"/>
+<path fill="none" stroke="#191970" d="M1220.1132,-492.4581C1288.7845,-481.3734 1401.8674,-463.1198 1474.0878,-451.4622"/>
+<polygon fill="#191970" stroke="#191970" points="1474.6839,-454.9114 1483.9983,-449.8625 1473.5684,-448.0008 1474.6839,-454.9114"/>
 </g>
 <!-- Node4 -->
 <g id="node5" class="node">
 <title>Node4</title>
 <g id="a_node5"><a xlink:href="functor_8h.html" target="_top" xlink:title="Defines the Functor data structures. ">
-<polygon fill="#ffffff" stroke="#000000" points="2208.5,-297 2208.5,-316 2317.5,-316 2317.5,-297 2208.5,-297"/>
-<text text-anchor="middle" x="2263" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/functor.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1290.5,-297 1290.5,-316 1399.5,-316 1399.5,-297 1290.5,-297"/>
+<text text-anchor="middle" x="1345" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/functor.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node4 -->
 <g id="edge4" class="edge">
 <title>Node3&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M2382.4394,-358.4639C2356.0182,-346.969 2319.9021,-331.2561 2294.3209,-320.1266"/>
-<polygon fill="#191970" stroke="#191970" points="2295.6408,-316.884 2285.0747,-316.1039 2292.8481,-323.3028 2295.6408,-316.884"/>
+<path fill="none" stroke="#191970" d="M1813.317,-366.2662C1715.0396,-353.7241 1513.6163,-328.0187 1409.8961,-314.782"/>
+<polygon fill="#191970" stroke="#191970" points="1410.2296,-311.2962 1399.8669,-313.5021 1409.3434,-318.2399 1410.2296,-311.2962"/>
 </g>
 <!-- Node3&#45;&gt;Node12 -->
 <g id="edge41" class="edge">
 <title>Node3&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M2473.5825,-360.617C2509.5693,-351.6207 2556.7919,-338.3166 2597,-322 2770.3796,-251.6419 2792.8596,-188.8529 2968,-123 3033.6259,-98.3246 3114.7283,-83.1981 3158.7947,-76.18"/>
-<polygon fill="#191970" stroke="#191970" points="3159.5659,-79.6022 3168.9101,-74.6085 3158.4913,-72.6852 3159.5659,-79.6022"/>
+<path fill="none" stroke="#191970" d="M1813.2355,-359.5733C1706.5561,-333.5987 1468.7147,-276.6277 1267,-235 1044.8541,-189.1559 988.0626,-184.1529 765,-143 634.2249,-118.8732 478.5175,-89.8716 412.333,-77.5314"/>
+<polygon fill="#191970" stroke="#191970" points="412.6038,-74.0217 402.1316,-75.6291 411.3205,-80.903 412.6038,-74.0217"/>
 </g>
 <!-- Node17 -->
 <g id="node18" class="node">
 <title>Node17</title>
 <g id="a_node18"><a xlink:href="array_8h.html" target="_top" xlink:title="Runtime Array container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1038,-291.5 1038,-321.5 1164,-321.5 1164,-291.5 1038,-291.5"/>
-<text text-anchor="start" x="1046" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1101" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2516,-291.5 2516,-321.5 2642,-321.5 2642,-291.5 2516,-291.5"/>
+<text text-anchor="start" x="2524" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="2579" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node17 -->
 <g id="edge19" class="edge">
 <title>Node3&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2360.4084,-372.2143C2191.3929,-368.1144 1675.5921,-353.7142 1249,-322 1224.6531,-320.19 1198.1139,-317.6042 1174.3185,-315.0619"/>
-<polygon fill="#191970" stroke="#191970" points="1174.6216,-311.5744 1164.3029,-313.9775 1173.868,-318.5337 1174.6216,-311.5744"/>
+<path fill="none" stroke="#191970" d="M1926.5737,-364.2774C1941.9311,-361.9901 1958.5775,-359.7076 1974,-358 2193.2412,-333.7248 2249.6639,-345.4022 2469,-322 2480.9107,-320.7292 2493.5168,-319.1626 2505.7798,-317.5187"/>
+<polygon fill="#191970" stroke="#191970" points="2506.5436,-320.9469 2515.9772,-316.1243 2505.5952,-314.0114 2506.5436,-320.9469"/>
 </g>
 <!-- Node3&#45;&gt;Node24 -->
 <g id="edge36" class="edge">
 <title>Node3&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M2439.8908,-358.4639C2456.5425,-347.526 2479.0083,-332.769 2495.7442,-321.7759"/>
-<polygon fill="#191970" stroke="#191970" points="2497.9425,-324.5195 2504.3791,-316.1039 2494.0994,-318.6688 2497.9425,-324.5195"/>
+<path fill="none" stroke="#191970" d="M1860.6965,-358.2967C1854.6186,-348.3645 1846.6466,-335.3371 1840.2181,-324.8321"/>
+<polygon fill="#191970" stroke="#191970" points="1843.0522,-322.7578 1834.8471,-316.055 1837.0814,-326.4115 1843.0522,-322.7578"/>
 </g>
 <!-- Node5 -->
 <g id="node6" class="node">
 <title>Node5</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1552.5,-179.5 1552.5,-198.5 1641.5,-198.5 1641.5,-179.5 1552.5,-179.5"/>
-<text text-anchor="middle" x="1597" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2275.5,-179.5 2275.5,-198.5 2364.5,-198.5 2364.5,-179.5 2275.5,-179.5"/>
+<text text-anchor="middle" x="2320" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
 </g>
 <!-- Node4&#45;&gt;Node5 -->
 <g id="edge5" class="edge">
 <title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M2254.8272,-296.9578C2240.3948,-280.8693 2208.6287,-248.7579 2174,-235 2149.9695,-225.4528 1791.5134,-201.5171 1651.9898,-192.5053"/>
-<polygon fill="#191970" stroke="#191970" points="1652.1395,-189.0078 1641.935,-191.8572 1651.6892,-195.9933 1652.1395,-189.0078"/>
+<path fill="none" stroke="#191970" d="M1391.7505,-296.9669C1467.9404,-281.7515 1623.7719,-251.968 1757,-235 1942.5667,-211.3662 2164.172,-197.4132 2265.0841,-191.8357"/>
+<polygon fill="#191970" stroke="#191970" points="2265.5434,-195.3159 2275.3373,-191.275 2265.161,-188.3264 2265.5434,-195.3159"/>
 </g>
 <!-- Node4&#45;&gt;Node6 -->
 <g id="edge6" class="edge">
 <title>Node4&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2264.0905,-296.8866C2265.2421,-281.8275 2265.1578,-252.5831 2250,-235 2217.7299,-197.5664 2082.9668,-162.4314 2005.4849,-144.8097"/>
-<polygon fill="#191970" stroke="#191970" points="2005.9338,-141.3233 1995.4094,-142.5435 2004.3976,-148.1527 2005.9338,-141.3233"/>
+<path fill="none" stroke="#191970" d="M1349.4703,-296.9248C1362.3846,-269.2625 1399.9206,-188.8614 1417.1576,-151.9403"/>
+<polygon fill="#191970" stroke="#191970" points="1420.3847,-153.3013 1421.4437,-142.7595 1414.0419,-150.3401 1420.3847,-153.3013"/>
 </g>
 <!-- Node4&#45;&gt;Node13 -->
 <g id="edge16" class="edge">
 <title>Node4&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M2278.0436,-296.9967C2335.4122,-260.7557 2540.7528,-131.0379 2611.1314,-86.5783"/>
-<polygon fill="#191970" stroke="#191970" points="2613.1842,-89.4214 2619.7692,-81.1216 2609.4456,-83.5034 2613.1842,-89.4214"/>
+<path fill="none" stroke="#191970" d="M1336.2505,-296.586C1322.6484,-281.6438 1295.0644,-253.1716 1267,-235 1153.3933,-161.4402 1001.1295,-107.4279 929.0831,-84.1583"/>
+<polygon fill="#191970" stroke="#191970" points="930.0527,-80.7938 919.4618,-81.08 927.9196,-87.4609 930.0527,-80.7938"/>
 </g>
 <!-- Node4&#45;&gt;Node14 -->
 <g id="edge17" class="edge">
 <title>Node4&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2258.5059,-296.7848C2250.5039,-280.7388 2232.1372,-249.0754 2206,-235 2127.8111,-192.8939 1895.567,-213.7798 1808,-199 1670.2478,-175.7498 1641.0304,-144.5372 1503,-123 1260.9611,-85.2341 1197.1824,-106.597 953,-87 907.7445,-83.368 855.5125,-78.2348 822.624,-74.8872"/>
-<polygon fill="#191970" stroke="#191970" points="822.9634,-71.4038 812.6589,-73.8673 822.2505,-78.3674 822.9634,-71.4038"/>
+<path fill="none" stroke="#191970" d="M1399.6244,-304.3757C1566.9151,-297.6699 2065.1106,-276.1417 2135,-255 2199.796,-235.3991 2202.979,-201.0008 2267,-179 2337.3908,-154.8102 2860.9341,-91.7394 2998.8057,-75.4074"/>
+<polygon fill="#191970" stroke="#191970" points="2999.622,-78.8354 3009.1417,-74.1851 2998.7999,-71.8838 2999.622,-78.8354"/>
 </g>
 <!-- Node4&#45;&gt;Node16 -->
 <g id="edge18" class="edge">
 <title>Node4&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2208.2931,-298.5635C2191.5153,-296.1326 2172.9987,-293.453 2156,-291 2051.669,-275.9445 1928.1892,-258.2214 1869.8102,-249.8479"/>
-<polygon fill="#191970" stroke="#191970" points="1870.0821,-246.3512 1859.6864,-248.3959 1869.0882,-253.2803 1870.0821,-246.3512"/>
+<path fill="none" stroke="#191970" d="M1327.6563,-296.8906C1309.5727,-286.8713 1281.0141,-271.0483 1260.1835,-259.5071"/>
+<polygon fill="#191970" stroke="#191970" points="1261.6654,-256.3268 1251.222,-254.5419 1258.2729,-262.4498 1261.6654,-256.3268"/>
 </g>
 <!-- Node6&#45;&gt;Node7 -->
 <g id="edge7" class="edge">
 <title>Node6&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1890.4118,-128.3017C1750.1169,-117.24 1399.216,-89.5728 1244.8602,-77.4024"/>
-<polygon fill="#191970" stroke="#191970" points="1244.7807,-73.8854 1234.5365,-76.5885 1244.2304,-80.8637 1244.7807,-73.8854"/>
+<path fill="none" stroke="#191970" d="M1485.5932,-126.213C1584.5735,-114.9402 1782.6716,-92.3791 1891.0021,-80.0414"/>
+<polygon fill="#191970" stroke="#191970" points="1891.6594,-83.4893 1901.1991,-78.8801 1890.8672,-76.5342 1891.6594,-83.4893"/>
 </g>
 <!-- Node11 -->
 <g id="node12" class="node">
 <title>Node11</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="962.5,-62 962.5,-81 1087.5,-81 1087.5,-62 962.5,-62"/>
-<text text-anchor="middle" x="1025" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2431.5,-62 2431.5,-81 2556.5,-81 2556.5,-62 2431.5,-62"/>
+<text text-anchor="middle" x="2494" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
 </g>
 <!-- Node6&#45;&gt;Node11 -->
 <g id="edge11" class="edge">
 <title>Node6&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M1890.1603,-128.7977C1862.6961,-126.9238 1829.7019,-124.7494 1800,-123 1487.2443,-104.5793 1406.5302,-128.5504 1096,-87 1088.5222,-85.9994 1080.664,-84.6206 1073.0111,-83.0948"/>
-<polygon fill="#191970" stroke="#191970" points="1073.623,-79.6471 1063.118,-81.0238 1072.1887,-86.4986 1073.623,-79.6471"/>
+<path fill="none" stroke="#191970" d="M1485.7088,-129.5617C1668.7832,-119.0195 2221.8671,-87.1706 2421.3092,-75.6858"/>
+<polygon fill="#191970" stroke="#191970" points="2421.6056,-79.1747 2431.3878,-75.1055 2421.2031,-72.1863 2421.6056,-79.1747"/>
 </g>
 <!-- Node6&#45;&gt;Node12 -->
 <g id="edge12" class="edge">
 <title>Node6&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M2009.856,-130.0337C2230.1575,-119.1163 2991.7331,-81.375 3158.5441,-73.1084"/>
-<polygon fill="#191970" stroke="#191970" points="3158.845,-76.5979 3168.6595,-72.6071 3158.4985,-69.6065 3158.845,-76.5979"/>
+<path fill="none" stroke="#191970" d="M1366.3861,-129.495C1172.005,-118.0663 560.5766,-82.1171 412.5221,-73.4121"/>
+<polygon fill="#191970" stroke="#191970" points="412.4795,-69.9037 402.2912,-72.8106 412.0685,-76.8916 412.4795,-69.9037"/>
 </g>
 <!-- Node6&#45;&gt;Node13 -->
 <g id="edge13" class="edge">
 <title>Node6&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M2009.7429,-124.6646C2014.5629,-124.072 2019.3608,-123.5087 2024,-123 2235.5572,-99.8045 2489.325,-81.4289 2590.3469,-74.4917"/>
-<polygon fill="#191970" stroke="#191970" points="2590.7516,-77.9723 2600.4897,-73.7984 2590.2743,-70.9886 2590.7516,-77.9723"/>
+<path fill="none" stroke="#191970" d="M1366.369,-126.1834C1257.6152,-113.7515 1029.3761,-87.661 932.744,-76.6148"/>
+<polygon fill="#191970" stroke="#191970" points="933.0502,-73.1271 922.7174,-75.4686 932.2551,-80.0818 933.0502,-73.1271"/>
 </g>
 <!-- Node6&#45;&gt;Node14 -->
 <g id="edge14" class="edge">
 <title>Node6&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1890.315,-129.6929C1853.3612,-127.6781 1804.9296,-125.0969 1762,-123 1402.5172,-105.4412 1312.039,-112.0434 953,-87 907.7091,-83.8409 855.4858,-78.5905 822.6089,-75.0879"/>
-<polygon fill="#191970" stroke="#191970" points="822.9646,-71.6061 812.6477,-74.017 822.2162,-78.566 822.9646,-71.6061"/>
+<path fill="none" stroke="#191970" d="M1485.5436,-130.7198C1748.2281,-120.6606 2799.2722,-80.4121 2999.0998,-72.7599"/>
+<polygon fill="#191970" stroke="#191970" points="2999.3961,-76.2512 3009.2549,-72.371 2999.1282,-69.2563 2999.3961,-76.2512"/>
 </g>
 <!-- Node15 -->
 <g id="node16" class="node">
 <title>Node15</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1862,-62 1862,-81 1912,-81 1912,-62 1862,-62"/>
-<text text-anchor="middle" x="1887" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">atomic</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1521,-62 1521,-81 1571,-81 1571,-62 1521,-62"/>
+<text text-anchor="middle" x="1546" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">atomic</text>
 </g>
 <!-- Node6&#45;&gt;Node15 -->
 <g id="edge15" class="edge">
 <title>Node6&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1940.1563,-123.3906C1930.5961,-114.0581 1915.8777,-99.6902 1904.3505,-88.4374"/>
-<polygon fill="#191970" stroke="#191970" points="1906.5832,-85.7257 1896.9825,-81.2449 1901.6934,-90.7348 1906.5832,-85.7257"/>
+<path fill="none" stroke="#191970" d="M1444.75,-123.3906C1464.476,-113.2811 1495.7312,-97.2628 1518.3,-85.6962"/>
+<polygon fill="#191970" stroke="#191970" points="1520.0787,-88.7176 1527.3817,-81.0419 1516.886,-82.4881 1520.0787,-88.7176"/>
 </g>
 <!-- Node8 -->
 <g id="node9" class="node">
 <title>Node8</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1030.5,-.5 1030.5,-19.5 1123.5,-19.5 1123.5,-.5 1030.5,-.5"/>
-<text text-anchor="middle" x="1077" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dlpack/dlpack.h</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1826.5,-.5 1826.5,-19.5 1919.5,-19.5 1919.5,-.5 1826.5,-.5"/>
+<text text-anchor="middle" x="1873" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dlpack/dlpack.h</text>
 </g>
 <!-- Node7&#45;&gt;Node8 -->
 <g id="edge8" class="edge">
 <title>Node7&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1147.0112,-56.2977C1132.8034,-46.9022 1114.6215,-34.8787 1100.3475,-25.4395"/>
-<polygon fill="#191970" stroke="#191970" points="1101.8845,-22.2598 1091.6128,-19.6633 1098.0233,-28.0986 1101.8845,-22.2598"/>
+<path fill="none" stroke="#191970" d="M1943.0112,-56.2977C1928.8034,-46.9022 1910.6215,-34.8787 1896.3475,-25.4395"/>
+<polygon fill="#191970" stroke="#191970" points="1897.8845,-22.2598 1887.6128,-19.6633 1894.0233,-28.0986 1897.8845,-22.2598"/>
 </g>
 <!-- Node9 -->
 <g id="node10" class="node">
 <title>Node9</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1142,-.5 1142,-19.5 1198,-19.5 1198,-.5 1142,-.5"/>
-<text text-anchor="middle" x="1170" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">stddef.h</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1938,-.5 1938,-19.5 1994,-19.5 1994,-.5 1938,-.5"/>
+<text text-anchor="middle" x="1966" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">stddef.h</text>
 </g>
 <!-- Node7&#45;&gt;Node9 -->
 <g id="edge9" class="edge">
 <title>Node7&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M1170,-56.2977C1170,-48.3834 1170,-38.6043 1170,-30.0759"/>
-<polygon fill="#191970" stroke="#191970" points="1173.5001,-29.8469 1170,-19.8469 1166.5001,-29.847 1173.5001,-29.8469"/>
+<path fill="none" stroke="#191970" d="M1966,-56.2977C1966,-48.3834 1966,-38.6043 1966,-30.0759"/>
+<polygon fill="#191970" stroke="#191970" points="1969.5001,-29.8469 1966,-19.8469 1962.5001,-29.847 1969.5001,-29.8469"/>
 </g>
 <!-- Node10 -->
 <g id="node11" class="node">
 <title>Node10</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1216.5,-.5 1216.5,-19.5 1269.5,-19.5 1269.5,-.5 1216.5,-.5"/>
-<text text-anchor="middle" x="1243" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">stdint.h</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2012.5,-.5 2012.5,-19.5 2065.5,-19.5 2065.5,-.5 2012.5,-.5"/>
+<text text-anchor="middle" x="2039" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">stdint.h</text>
 </g>
 <!-- Node7&#45;&gt;Node10 -->
 <g id="edge10" class="edge">
 <title>Node7&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M1188.0449,-56.2977C1198.8114,-47.2274 1212.4851,-35.7077 1223.4995,-26.4285"/>
-<polygon fill="#191970" stroke="#191970" points="1225.9191,-28.9667 1231.3118,-19.8469 1221.4089,-23.6132 1225.9191,-28.9667"/>
+<path fill="none" stroke="#191970" d="M1984.0449,-56.2977C1994.8114,-47.2274 2008.4851,-35.7077 2019.4995,-26.4285"/>
+<polygon fill="#191970" stroke="#191970" points="2021.9191,-28.9667 2027.3118,-19.8469 2017.4089,-23.6132 2021.9191,-28.9667"/>
 </g>
 <!-- Node17&#45;&gt;Node14 -->
 <g id="edge22" class="edge">
 <title>Node17&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1037.6641,-293.1067C1008.9678,-285.081 975.5937,-272.8732 949,-255 880.5929,-209.0248 823.9277,-126.2332 800.9737,-89.6843"/>
-<polygon fill="#191970" stroke="#191970" points="803.8758,-87.7224 795.6434,-81.0532 797.92,-91.4006 803.8758,-87.7224"/>
+<path fill="none" stroke="#191970" d="M2639.9751,-291.4613C2689.129,-278.9879 2751.7153,-262.178 2762,-255 2796.191,-231.1369 2785.2176,-205.988 2817,-179 2873.4864,-131.0346 2955.0536,-97.8888 2999.7306,-82.0813"/>
+<polygon fill="#191970" stroke="#191970" points="3000.9072,-85.3779 3009.2075,-78.7934 2998.6127,-78.7646 3000.9072,-85.3779"/>
 </g>
 <!-- Node17&#45;&gt;Node16 -->
 <g id="edge23" class="edge">
 <title>Node17&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1164.3424,-299.4985C1190.5321,-296.6993 1221.2144,-293.5433 1249,-291 1459.3134,-271.7497 1712.4838,-253.6106 1802.3577,-247.327"/>
-<polygon fill="#191970" stroke="#191970" points="1802.7623,-250.8074 1812.4946,-246.6202 1802.2754,-243.8244 1802.7623,-250.8074"/>
+<path fill="none" stroke="#191970" d="M2515.7251,-296.4508C2500.461,-294.3405 2484.1761,-292.3483 2469,-291 2226.0323,-269.4145 1440.4188,-249.8849 1267.7597,-245.7872"/>
+<polygon fill="#191970" stroke="#191970" points="1267.5881,-242.2823 1257.5082,-245.545 1267.4227,-249.2803 1267.5881,-242.2823"/>
 </g>
 <!-- Node18 -->
 <g id="node19" class="node">
 <title>Node18</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1323,-179.5 1323,-198.5 1387,-198.5 1387,-179.5 1323,-179.5"/>
-<text text-anchor="middle" x="1355" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2826,-179.5 2826,-198.5 2890,-198.5 2890,-179.5 2826,-179.5"/>
+<text text-anchor="middle" x="2858" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
 </g>
 <!-- Node17&#45;&gt;Node18 -->
 <g id="edge20" class="edge">
 <title>Node17&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1131.9059,-291.4533C1162.1498,-276.8036 1209.5801,-254.0273 1251,-235 1275.3156,-223.83 1303.0355,-211.5937 1323.7392,-202.5512"/>
-<polygon fill="#191970" stroke="#191970" points="1325.2377,-205.7161 1333.0065,-198.5122 1322.441,-199.2991 1325.2377,-205.7161"/>
+<path fill="none" stroke="#191970" d="M2642.1997,-292.8128C2645.1734,-292.1945 2648.1184,-291.5875 2651,-291 2733.9313,-274.0928 2775.1213,-311.6545 2838,-255 2850.9004,-243.3766 2855.603,-223.7402 2857.2559,-208.9005"/>
+<polygon fill="#191970" stroke="#191970" points="2860.7632,-208.9308 2858.0107,-198.6997 2853.7822,-208.4142 2860.7632,-208.9308"/>
 </g>
 <!-- Node19 -->
 <g id="node20" class="node">
 <title>Node19</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="704,-235.5 704,-254.5 762,-254.5 762,-235.5 704,-235.5"/>
-<text text-anchor="middle" x="733" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1766,-235.5 1766,-254.5 1824,-254.5 1824,-235.5 1766,-235.5"/>
+<text text-anchor="middle" x="1795" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
 </g>
 <!-- Node17&#45;&gt;Node19 -->
 <g id="edge21" class="edge">
 <title>Node17&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M1037.8254,-292.6836C1034.845,-292.0999 1031.8917,-291.5355 1029,-291 937.2932,-274.0176 828.8114,-258.2542 772.0632,-250.336"/>
-<polygon fill="#191970" stroke="#191970" points="772.4712,-246.8592 762.0848,-248.9503 771.5083,-253.7926 772.4712,-246.8592"/>
+<path fill="none" stroke="#191970" d="M2515.9823,-296.8272C2500.6357,-294.691 2484.2493,-292.5927 2469,-291 2228.5001,-265.8815 1938.225,-251.3863 1834.3329,-246.695"/>
+<polygon fill="#191970" stroke="#191970" points="1834.4443,-243.1966 1824.2979,-246.2463 1834.1315,-250.1896 1834.4443,-243.1966"/>
 </g>
 <!-- Node20 -->
 <g id="node21" class="node">
 <title>Node20</title>
 <g id="a_node21"><a xlink:href="runtime_2container_2base_8h.html" target="_top" xlink:title="Base utilities for common POD(plain old data) container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1105.5,-235.5 1105.5,-254.5 1160.5,-254.5 1160.5,-235.5 1105.5,-235.5"/>
-<text text-anchor="middle" x="1133" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2626.5,-235.5 2626.5,-254.5 2681.5,-254.5 2681.5,-235.5 2626.5,-235.5"/>
+<text text-anchor="middle" x="2654" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
 </a>
 </g>
 </g>
 <!-- Node17&#45;&gt;Node20 -->
 <g id="edge24" class="edge">
 <title>Node17&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1108.9101,-291.2977C1113.2132,-283.0277 1118.5758,-272.7215 1123.1482,-263.9339"/>
-<polygon fill="#191970" stroke="#191970" points="1126.3654,-265.3335 1127.8764,-254.8469 1120.1557,-262.1024 1126.3654,-265.3335"/>
+<path fill="none" stroke="#191970" d="M2597.5393,-291.2977C2608.6008,-282.2274 2622.6491,-270.7077 2633.9653,-261.4285"/>
+<polygon fill="#191970" stroke="#191970" points="2636.4781,-263.8942 2641.9916,-254.8469 2632.0396,-258.4813 2636.4781,-263.8942"/>
 </g>
 <!-- Node20&#45;&gt;Node5 -->
 <g id="edge25" class="edge">
 <title>Node20&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M1160.9283,-242.5601C1225.8564,-236.7401 1392.1855,-220.9819 1541.9559,-199.0617"/>
-<polygon fill="#191970" stroke="#191970" points="1542.8708,-202.4646 1552.2521,-197.541 1541.848,-195.5397 1542.8708,-202.4646"/>
+<path fill="none" stroke="#191970" d="M2626.149,-240.6163C2577.4502,-232.9098 2473.4137,-216.2633 2374.9039,-199.1308"/>
+<polygon fill="#191970" stroke="#191970" points="2375.2292,-195.6347 2364.7765,-197.3642 2374.0263,-202.5306 2375.2292,-195.6347"/>
 </g>
 <!-- Node20&#45;&gt;Node6 -->
 <g id="edge32" class="edge">
 <title>Node20&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1140.9825,-235.1626C1154.0046,-219.9416 1181.4509,-191.1704 1212,-179 1272.9919,-154.7016 1707.7699,-139.863 1879.9976,-134.8816"/>
-<polygon fill="#191970" stroke="#191970" points="1880.323,-138.3738 1890.2186,-134.5887 1880.1224,-131.3767 1880.323,-138.3738"/>
+<path fill="none" stroke="#191970" d="M2634.4553,-235.4977C2602.161,-220.3283 2535.2335,-191.1182 2475,-179 2379.6735,-159.8216 1714.57,-140.6583 1495.6687,-134.807"/>
+<polygon fill="#191970" stroke="#191970" points="1495.6759,-131.306 1485.5862,-134.5384 1495.4894,-138.3035 1495.6759,-131.306"/>
 </g>
 <!-- Node20&#45;&gt;Node11 -->
 <g id="edge26" class="edge">
 <title>Node20&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M1134.4571,-235.1097C1135.9783,-221.7523 1137.1402,-197.4912 1129,-179 1111.4985,-139.2438 1072.4643,-105.5442 1047.5101,-86.9607"/>
-<polygon fill="#191970" stroke="#191970" points="1049.4804,-84.0661 1039.3291,-81.0328 1045.3731,-89.7345 1049.4804,-84.0661"/>
+<path fill="none" stroke="#191970" d="M2645.1698,-235.4248C2619.3329,-207.4079 2543.6057,-125.2912 2510.1769,-89.0418"/>
+<polygon fill="#191970" stroke="#191970" points="2512.3524,-86.2381 2503.0001,-81.2595 2507.2065,-90.9836 2512.3524,-86.2381"/>
 </g>
 <!-- Node20&#45;&gt;Node14 -->
 <g id="edge35" class="edge">
 <title>Node20&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1117.7356,-235.3926C1083.0505,-213.7743 995.3141,-160.2831 918,-123 886.0501,-107.5928 848.2341,-92.7782 822.0288,-83.0259"/>
-<polygon fill="#191970" stroke="#191970" points="823.1837,-79.7214 812.5903,-79.5454 820.7617,-86.2891 823.1837,-79.7214"/>
+<path fill="none" stroke="#191970" d="M2655.1936,-235.1649C2658.615,-211.5867 2670.7355,-151.6749 2708,-123 2753.2818,-88.1558 2926.5942,-76.2956 2999.4435,-72.7953"/>
+<polygon fill="#191970" stroke="#191970" points="2999.6062,-76.2916 3009.4356,-72.3377 2999.2859,-69.2989 2999.6062,-76.2916"/>
 </g>
 <!-- Node20&#45;&gt;Node18 -->
 <g id="edge33" class="edge">
 <title>Node20&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1160.6676,-238.0208C1198.8404,-228.3916 1267.9995,-210.9461 1312.687,-199.6736"/>
-<polygon fill="#191970" stroke="#191970" points="1313.8179,-202.998 1322.6581,-197.1583 1312.1057,-196.2106 1313.8179,-202.998"/>
+<path fill="none" stroke="#191970" d="M2681.5123,-237.4476C2716.102,-227.9524 2775.82,-211.5592 2816.1004,-200.5018"/>
+<polygon fill="#191970" stroke="#191970" points="2817.1786,-203.8354 2825.8953,-197.813 2815.3255,-197.0851 2817.1786,-203.8354"/>
 </g>
 <!-- Node20&#45;&gt;Node21 -->
 <g id="edge27" class="edge">
 <title>Node20&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1160.5781,-240.4173C1212.9758,-231.7103 1328.291,-212.5481 1402.395,-200.2341"/>
-<polygon fill="#191970" stroke="#191970" points="1403.2761,-203.6357 1412.567,-198.5438 1402.1285,-196.7304 1403.2761,-203.6357"/>
+<path fill="none" stroke="#191970" d="M2626.2725,-240.8264C2612.149,-238.8218 2594.6889,-236.529 2579,-235 2334.8389,-211.2047 2044.3308,-197.4271 1909.9874,-191.8785"/>
+<polygon fill="#191970" stroke="#191970" points="1909.9441,-188.3739 1899.8092,-191.4616 1909.6575,-195.3681 1909.9441,-188.3739"/>
 </g>
 <!-- Node23 -->
 <g id="node24" class="node">
 <title>Node23</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1221.5,-179.5 1221.5,-198.5 1304.5,-198.5 1304.5,-179.5 1221.5,-179.5"/>
-<text text-anchor="middle" x="1263" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">initializer_list</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2382.5,-179.5 2382.5,-198.5 2465.5,-198.5 2465.5,-179.5 2382.5,-179.5"/>
+<text text-anchor="middle" x="2424" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">initializer_list</text>
 </g>
 <!-- Node20&#45;&gt;Node23 -->
 <g id="edge34" class="edge">
 <title>Node20&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1155.3478,-235.3733C1176.4033,-226.3032 1208.1495,-212.6279 1231.7175,-202.4755"/>
-<polygon fill="#191970" stroke="#191970" points="1233.1444,-205.6719 1240.9438,-198.5011 1230.375,-199.243 1233.1444,-205.6719"/>
+<path fill="none" stroke="#191970" d="M2626.2566,-238.2451C2588.4109,-229.0305 2519.9794,-212.3689 2473.2924,-201.0016"/>
+<polygon fill="#191970" stroke="#191970" points="2473.9314,-197.555 2463.3872,-198.5899 2472.2754,-204.3563 2473.9314,-197.555"/>
 </g>
 <!-- Node21&#45;&gt;Node6 -->
 <g id="edge28" class="edge">
 <title>Node21&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1534.8663,-180.0167C1537.6135,-179.6671 1540.3336,-179.3267 1543,-179 1690.7574,-160.8965 1728.2262,-160.9693 1876,-143 1877.303,-142.8416 1878.6187,-142.68 1879.944,-142.5158"/>
-<polygon fill="#191970" stroke="#191970" points="1880.7609,-145.9405 1890.2431,-141.2142 1879.8832,-138.9958 1880.7609,-145.9405"/>
+<path fill="none" stroke="#191970" d="M1770.1927,-180.1266C1695.2824,-169.87 1572.4687,-153.0544 1495.5882,-142.528"/>
+<polygon fill="#191970" stroke="#191970" points="1495.9015,-139.0383 1485.5191,-141.1493 1494.9519,-145.9736 1495.9015,-139.0383"/>
 </g>
 <!-- Node21&#45;&gt;Node13 -->
 <g id="edge30" class="edge">
 <title>Node21&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1520.0964,-179.4442C1597.2994,-165.0608 1750.1416,-137.9444 1881,-123 2149.2675,-92.363 2473.4663,-77.7028 2590.3206,-73.1306"/>
-<polygon fill="#191970" stroke="#191970" points="2590.4697,-76.6276 2600.3271,-72.744 2590.1994,-69.6328 2590.4697,-76.6276"/>
+<path fill="none" stroke="#191970" d="M1803.4617,-179.4628C1753.5664,-164.7847 1653.1368,-136.9018 1566,-123 1329.6496,-85.2926 1042.4014,-75.0097 933.1251,-72.3633"/>
+<polygon fill="#191970" stroke="#191970" points="932.8877,-68.857 922.809,-72.1236 932.725,-75.8551 932.8877,-68.857"/>
 </g>
 <!-- Node21&#45;&gt;Node14 -->
 <g id="edge31" class="edge">
 <title>Node21&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1414.9287,-179.484C1279.0686,-156.0082 931.094,-95.8802 822.725,-77.1547"/>
-<polygon fill="#191970" stroke="#191970" points="823.1628,-73.6785 812.7128,-75.4246 821.9708,-80.5763 823.1628,-73.6785"/>
+<path fill="none" stroke="#191970" d="M1899.7803,-182.641C2120.0775,-161.0162 2835.6465,-90.7745 2998.7142,-74.7674"/>
+<polygon fill="#191970" stroke="#191970" points="2999.4566,-78.2114 3009.0669,-73.7512 2998.7727,-71.2449 2999.4566,-78.2114"/>
 </g>
 <!-- Node22 -->
 <g id="node23" class="node">
 <title>Node22</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1445.5,-123.5 1445.5,-142.5 1494.5,-142.5 1494.5,-123.5 1445.5,-123.5"/>
-<text text-anchor="middle" x="1470" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstdlib</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1810.5,-123.5 1810.5,-142.5 1859.5,-142.5 1859.5,-123.5 1810.5,-123.5"/>
+<text text-anchor="middle" x="1835" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstdlib</text>
 </g>
 <!-- Node21&#45;&gt;Node22 -->
 <g id="edge29" class="edge">
 <title>Node21&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1470,-179.2455C1470,-171.9382 1470,-161.6944 1470,-152.7046"/>
-<polygon fill="#191970" stroke="#191970" points="1473.5001,-152.6426 1470,-142.6427 1466.5001,-152.6427 1473.5001,-152.6426"/>
+<path fill="none" stroke="#191970" d="M1835,-179.2455C1835,-171.9382 1835,-161.6944 1835,-152.7046"/>
+<polygon fill="#191970" stroke="#191970" points="1838.5001,-152.6426 1835,-142.6427 1831.5001,-152.6427 1838.5001,-152.6426"/>
 </g>
 <!-- Node24&#45;&gt;Node7 -->
 <g id="edge37" class="edge">
 <title>Node24&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M2511.9092,-296.8697C2487.6713,-264.8441 2404.2168,-161.7578 2307,-123 2257.7194,-103.3531 1490.7321,-80.4207 1245.0743,-73.5466"/>
-<polygon fill="#191970" stroke="#191970" points="1244.9088,-70.0407 1234.8151,-73.2604 1244.7136,-77.038 1244.9088,-70.0407"/>
+<path fill="none" stroke="#191970" d="M1837.2655,-296.9288C1852.708,-278.7373 1886.416,-237.537 1909,-199 1929.022,-164.8347 1946.6965,-122.3691 1956.8696,-96.0993"/>
+<polygon fill="#191970" stroke="#191970" points="1960.2178,-97.142 1960.5123,-86.5513 1953.6776,-94.6468 1960.2178,-97.142"/>
 </g>
 <!-- Node24&#45;&gt;Node11 -->
 <g id="edge38" class="edge">
 <title>Node24&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2484.4234,-296.9173C2430.8549,-282.2844 2324.4389,-254.0644 2233,-235 1938.3041,-173.5577 1863.8187,-159.445 1565,-123 1357.4801,-97.6901 1302.8403,-117.3672 1096,-87 1088.7911,-85.9416 1081.2216,-84.572 1073.8209,-83.0854"/>
-<polygon fill="#191970" stroke="#191970" points="1074.3887,-79.6284 1063.8836,-81.0039 1072.9536,-86.4797 1074.3887,-79.6284"/>
+<path fill="none" stroke="#191970" d="M1854.5037,-296.9947C1891.7016,-283.1661 1963.0482,-256.7745 2024,-235 2184.528,-177.6527 2376.8067,-111.5787 2456.531,-84.2973"/>
+<polygon fill="#191970" stroke="#191970" points="2457.8024,-87.5616 2466.1314,-81.0134 2455.5368,-80.9383 2457.8024,-87.5616"/>
 </g>
 <!-- Node24&#45;&gt;Node12 -->
 <g id="edge39" class="edge">
 <title>Node24&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M2534.3101,-296.9403C2583.9072,-266.4254 2745.4471,-170.5727 2892,-123 2986.1343,-92.443 3103.1544,-78.9985 3158.7581,-74.0076"/>
-<polygon fill="#191970" stroke="#191970" points="3159.06,-77.4946 3168.7211,-73.1456 3158.4566,-70.5207 3159.06,-77.4946"/>
+<path fill="none" stroke="#191970" d="M1777.8976,-296.98C1624.7066,-268.5942 1155.7332,-182.805 765,-123 634.6074,-103.0423 478.698,-83.5156 412.3969,-75.4137"/>
+<polygon fill="#191970" stroke="#191970" points="412.5268,-71.9037 402.1769,-74.1687 411.6803,-78.8523 412.5268,-71.9037"/>
 </g>
 <!-- Node24&#45;&gt;Node13 -->
 <g id="edge40" class="edge">
 <title>Node24&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M2526.7941,-296.9771C2541.3052,-278.866 2572.7354,-237.7945 2592,-199 2610.101,-162.5487 2623.4799,-116.6607 2630.1807,-91.0621"/>
-<polygon fill="#191970" stroke="#191970" points="2633.6346,-91.6813 2632.7104,-81.1269 2626.851,-89.9541 2633.6346,-91.6813"/>
+<path fill="none" stroke="#191970" d="M1790.9462,-296.9967C1641.3929,-259.6481 1094.3044,-123.0213 933.0308,-82.7457"/>
+<polygon fill="#191970" stroke="#191970" points="933.5244,-79.2616 922.9743,-80.2343 931.8283,-86.053 933.5244,-79.2616"/>
 </g>
 <!-- Node25&#45;&gt;Node4 -->
 <g id="edge43" class="edge">
 <title>Node25&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M2147.6599,-425.389C2172.0921,-400.2051 2220.9736,-349.8195 2246.5471,-323.4592"/>
-<polygon fill="#191970" stroke="#191970" points="2249.2315,-325.7187 2253.6825,-316.1042 2244.2073,-320.8445 2249.2315,-325.7187"/>
+<path fill="none" stroke="#191970" d="M1120.4857,-425.21C1133.8833,-406.7684 1158.7487,-376.0079 1187,-358 1216.7462,-339.0392 1254.1197,-326.4425 1285.0493,-318.4558"/>
+<polygon fill="#191970" stroke="#191970" points="1285.9855,-321.8297 1294.8471,-316.0225 1284.2983,-315.0361 1285.9855,-321.8297"/>
 </g>
 <!-- Node25&#45;&gt;Node12 -->
-<g id="edge78" class="edge">
+<g id="edge80" class="edge">
 <title>Node25&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M2189.6181,-435.2023C2338.5977,-420.6405 2738.2666,-377.3844 2858,-322 2891.1786,-306.6527 3090.647,-144.789 3120,-123 3136.5047,-110.7485 3155.344,-97.0986 3169.4672,-86.9313"/>
-<polygon fill="#191970" stroke="#191970" points="3171.5108,-89.7727 3177.5884,-81.0943 3167.4255,-84.0885 3171.5108,-89.7727"/>
+<path fill="none" stroke="#191970" d="M1071.9758,-425.3848C997.4737,-395.4036 827.5561,-325.2183 690,-255 646.0052,-232.542 636.5511,-224.0859 594,-199 525.3654,-158.5367 444.6868,-110.2805 404.8447,-86.4034"/>
+<polygon fill="#191970" stroke="#191970" points="406.4536,-83.2872 396.0771,-81.1475 402.8544,-89.2911 406.4536,-83.2872"/>
 </g>
 <!-- Node25&#45;&gt;Node24 -->
 <g id="edge44" class="edge">
 <title>Node25&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M2162.4922,-425.4814C2198.4865,-407.5922 2261.5425,-377.6636 2318,-358 2365.8256,-341.3428 2421.7408,-327.4493 2462.4372,-318.3075"/>
-<polygon fill="#191970" stroke="#191970" points="2463.4377,-321.6707 2472.4417,-316.0869 2461.9209,-314.837 2463.4377,-321.6707"/>
+<path fill="none" stroke="#191970" d="M1166.7735,-436.9175C1280.1247,-429.4013 1530.7353,-410.8607 1614,-389 1647.3036,-380.2563 1652.993,-370.6938 1685,-358 1720.5449,-343.9031 1761.6712,-329.368 1790.8493,-319.3493"/>
+<polygon fill="#191970" stroke="#191970" points="1792.3123,-322.5482 1800.643,-316.0021 1790.0484,-315.9243 1792.3123,-322.5482"/>
 </g>
 <!-- Node25&#45;&gt;Node26 -->
 <g id="edge45" class="edge">
 <title>Node25&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2076.2412,-434.9886C1951.5415,-422.8801 1653.8162,-393.9706 1515.6871,-380.558"/>
-<polygon fill="#191970" stroke="#191970" points="1515.9136,-377.0636 1505.6221,-379.5807 1515.237,-384.0309 1515.9136,-377.0636"/>
+<path fill="none" stroke="#191970" d="M1166.6696,-437.8934C1311.4862,-431.1141 1706.9189,-411.8584 2036,-389 2073.2049,-386.4157 2114.5518,-383.0222 2148.7963,-380.0694"/>
+<polygon fill="#191970" stroke="#191970" points="2149.5232,-383.5196 2159.1834,-379.1685 2148.9183,-376.5457 2149.5232,-383.5196"/>
 </g>
 <!-- Node36 -->
-<g id="node33" class="node">
+<g id="node35" class="node">
 <title>Node36</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1782.5,-297 1782.5,-316 1847.5,-316 1847.5,-297 1782.5,-297"/>
-<text text-anchor="middle" x="1815" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">functional</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="978.5,-297 978.5,-316 1043.5,-316 1043.5,-297 978.5,-297"/>
+<text text-anchor="middle" x="1011" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">functional</text>
 </g>
 <!-- Node25&#45;&gt;Node36 -->
-<g id="edge77" class="edge">
+<g id="edge79" class="edge">
 <title>Node25&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M2097.1395,-425.389C2034.6699,-399.0653 1906.8572,-345.2071 1847.1138,-320.0322"/>
-<polygon fill="#191970" stroke="#191970" points="1848.3664,-316.762 1837.792,-316.1042 1845.6481,-323.2127 1848.3664,-316.762"/>
+<path fill="none" stroke="#191970" d="M1098.8359,-425.389C1080.4705,-400.5307 1043.9643,-351.1184 1024.2939,-324.4937"/>
+<polygon fill="#191970" stroke="#191970" points="1026.8529,-322.0674 1018.0956,-316.1042 1021.2228,-326.227 1026.8529,-322.0674"/>
 </g>
 <!-- Node26&#45;&gt;Node6 -->
-<g id="edge69" class="edge">
+<g id="edge71" class="edge">
 <title>Node26&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1480.6452,-363.9982C1516.047,-354.6642 1570.2892,-339.3316 1616,-322 1649.0016,-309.4872 1729.0798,-272.0183 1760,-255 1823.0509,-220.2972 1893.3876,-172.6221 1928.1314,-148.4297"/>
-<polygon fill="#191970" stroke="#191970" points="1930.374,-151.1324 1936.5624,-142.5327 1926.3618,-145.3962 1930.374,-151.1324"/>
+<path fill="none" stroke="#191970" d="M2176.5568,-363.9555C2110.9853,-350.1174 1996.6865,-325.722 1988,-322 1965.339,-312.2901 1964.778,-300.4322 1942,-291 1864.6084,-258.9526 1838.7223,-273.3835 1757,-255 1658.9843,-232.9512 1630.8666,-237.3279 1538,-199 1504.8116,-185.3025 1469.6213,-163.1424 1447.7329,-148.3266"/>
+<polygon fill="#191970" stroke="#191970" points="1449.7083,-145.4373 1439.4833,-142.6623 1445.746,-151.208 1449.7083,-145.4373"/>
 </g>
 <!-- Node26&#45;&gt;Node7 -->
 <g id="edge46" class="edge">
 <title>Node26&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1400.9153,-363.9723C1361.308,-354.62 1300.5436,-339.275 1249,-322 1214.4113,-310.4075 1206.7978,-304.7285 1173,-291 1133.4678,-274.9423 1107.8531,-290.379 1084,-255 1047.6432,-201.0754 1108.6311,-129.3503 1145.3393,-93.6473"/>
-<polygon fill="#191970" stroke="#191970" points="1147.9225,-96.0208 1152.7601,-86.5948 1143.1003,-90.9467 1147.9225,-96.0208"/>
+<path fill="none" stroke="#191970" d="M2186.4248,-363.996C2138.4794,-349.2066 2054.2732,-315.8543 2012,-255 1977.9768,-206.022 1969.076,-134.2071 1966.7762,-96.5908"/>
+<polygon fill="#191970" stroke="#191970" points="1970.267,-96.3178 1966.2704,-86.5057 1963.2758,-96.6686 1970.267,-96.3178"/>
 </g>
 <!-- Node26&#45;&gt;Node14 -->
-<g id="edge75" class="edge">
+<g id="edge77" class="edge">
 <title>Node26&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1403.5656,-363.9841C1393.0373,-361.7255 1381.6309,-359.5306 1371,-358 1263.8093,-342.5674 978.8982,-375.9548 885,-322 802.128,-274.381 791.0877,-141.8032 789.9571,-91.3436"/>
-<polygon fill="#191970" stroke="#191970" points="793.4546,-91.0863 789.844,-81.1257 786.455,-91.1639 793.4546,-91.0863"/>
+<path fill="none" stroke="#191970" d="M2262.955,-363.986C2271.8612,-361.9671 2281.2391,-359.8792 2290,-358 2369.3439,-340.9813 2395.6286,-356.6658 2469,-322 2488.707,-312.689 2487.176,-300.0594 2507,-291 2591.5342,-252.3686 2629.6031,-296.029 2713,-255 2757.88,-232.9202 2758.006,-211.5297 2796,-179 2826.2644,-153.0882 2830.8049,-141.6725 2866,-123 2909.4816,-99.9311 2964.9511,-85.3508 2999.476,-77.8079"/>
+<polygon fill="#191970" stroke="#191970" points="3000.4226,-81.1852 3009.4817,-75.6913 2998.9738,-74.3367 3000.4226,-81.1852"/>
 </g>
 <!-- Node26&#45;&gt;Node15 -->
-<g id="edge73" class="edge">
+<g id="edge75" class="edge">
 <title>Node26&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1464.6502,-363.8379C1485.6863,-354.1577 1518.277,-338.3792 1545,-322 1674.0164,-242.9225 1816.3998,-129.2831 1867.4576,-87.6068"/>
-<polygon fill="#191970" stroke="#191970" points="1869.8629,-90.1609 1875.3816,-81.1168 1865.4275,-84.7454 1869.8629,-90.1609"/>
+<path fill="none" stroke="#191970" d="M2180.3055,-363.986C2143.1471,-354.9568 2087.596,-340.0957 2041,-322 2012.2816,-310.8471 2006.9736,-303.9079 1979,-291 1883.8836,-247.1103 1855.5319,-246.1722 1762,-199 1689.4701,-162.42 1607.95,-111.4497 1569.0545,-86.485"/>
+<polygon fill="#191970" stroke="#191970" points="1570.85,-83.4782 1560.5484,-81.003 1567.058,-89.3622 1570.85,-83.4782"/>
 </g>
 <!-- Node26&#45;&gt;Node16 -->
-<g id="edge76" class="edge">
+<g id="edge78" class="edge">
 <title>Node26&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1490.1839,-363.9683C1561.0674,-349.6172 1687.3085,-323.9213 1692,-322 1715.5012,-312.3755 1717.8379,-303.4007 1740,-291 1761.1096,-279.1883 1785.7282,-267.4301 1804.7496,-258.7583"/>
-<polygon fill="#191970" stroke="#191970" points="1806.1933,-261.9468 1813.8674,-254.642 1803.313,-255.5668 1806.1933,-261.9468"/>
+<path fill="none" stroke="#191970" d="M2175.1966,-363.9533C2165.2352,-361.9566 2154.7742,-359.8859 2145,-358 2058.4648,-341.3035 2031.0502,-356.6117 1950,-322 1928.3332,-312.7474 1929.16,-298.9997 1907,-291 1846.5982,-269.195 1395.6749,-250.9871 1267.7455,-246.2187"/>
+<polygon fill="#191970" stroke="#191970" points="1267.8605,-242.7206 1257.738,-245.8486 1267.6018,-249.7159 1267.8605,-242.7206"/>
 </g>
 <!-- Node26&#45;&gt;Node24 -->
-<g id="edge68" class="edge">
+<g id="edge70" class="edge">
 <title>Node26&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1505.599,-370.8722C1654.0503,-364.4467 2039.0506,-346.6458 2360,-322 2385.9998,-320.0035 2414.3484,-317.3828 2439.8136,-314.8657"/>
-<polygon fill="#191970" stroke="#191970" points="2440.1867,-318.3459 2449.7899,-313.8704 2439.4918,-311.3805 2440.1867,-318.3459"/>
+<path fill="none" stroke="#191970" d="M2176.4631,-363.978C2166.135,-361.9174 2155.2035,-359.8143 2145,-358 2041.8338,-339.6562 2015.2923,-339.62 1912,-322 1904.3798,-320.7001 1896.3771,-319.2781 1888.4728,-317.8393"/>
+<polygon fill="#191970" stroke="#191970" points="1889.0174,-314.3809 1878.5495,-316.0159 1887.7522,-321.2656 1889.0174,-314.3809"/>
 </g>
 <!-- Node27 -->
 <g id="node28" class="node">
 <title>Node27</title>
 <g id="a_node28"><a xlink:href="optional_8h.html" target="_top" xlink:title="Runtime Optional container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="484,-291.5 484,-321.5 610,-321.5 610,-291.5 484,-291.5"/>
-<text text-anchor="start" x="492" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="547" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/optional.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2842,-291.5 2842,-321.5 2968,-321.5 2968,-291.5 2842,-291.5"/>
+<text text-anchor="start" x="2850" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="2905" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/optional.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node27 -->
 <g id="edge47" class="edge">
 <title>Node26&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1403.9529,-363.9818C1393.3206,-361.6951 1381.7662,-359.4868 1371,-358 1297.8035,-347.8917 807.8994,-320.6536 620.0885,-310.44"/>
-<polygon fill="#191970" stroke="#191970" points="620.2085,-306.9414 610.0333,-309.8937 619.8287,-313.9311 620.2085,-306.9414"/>
+<path fill="none" stroke="#191970" d="M2284.8493,-368.8922C2390.4124,-360.9197 2609.9499,-343.3399 2795,-322 2806.8995,-320.6278 2819.4998,-319.009 2831.7604,-317.3441"/>
+<polygon fill="#191970" stroke="#191970" points="2832.528,-320.7715 2841.9567,-315.9392 2831.5725,-313.837 2832.528,-320.7715"/>
 </g>
 <!-- Node26&#45;&gt;Node28 -->
 <g id="edge50" class="edge">
 <title>Node26&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1402.4005,-363.9644C1392.1906,-361.7861 1381.2272,-359.6343 1371,-358 1223.3176,-334.4005 1181.7013,-345.6087 1030.279,-322.0937"/>
-<polygon fill="#191970" stroke="#191970" points="1030.5674,-318.596 1020.144,-320.4939 1029.4759,-325.5104 1030.5674,-318.596"/>
+<path fill="none" stroke="#191970" d="M2284.7806,-367.8006C2366.5485,-359.9689 2514.0989,-344.4422 2649.6915,-322.1291"/>
+<polygon fill="#191970" stroke="#191970" points="2650.5566,-325.5334 2659.8474,-320.4412 2649.4089,-318.6282 2650.5566,-325.5334"/>
 </g>
 <!-- Node29 -->
 <g id="node30" class="node">
 <title>Node29</title>
 <g id="a_node30"><a xlink:href="string_8h.html" target="_top" xlink:title="Runtime String container types. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1410,-291.5 1410,-321.5 1536,-321.5 1536,-291.5 1410,-291.5"/>
-<text text-anchor="start" x="1418" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1473" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2334,-291.5 2334,-321.5 2460,-321.5 2460,-291.5 2334,-291.5"/>
+<text text-anchor="start" x="2342" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="2397" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node29 -->
 <g id="edge54" class="edge">
 <title>Node26&#45;&gt;Node29</title>
-<path fill="none" stroke="#191970" d="M1447.2983,-363.9005C1451.1715,-355.2503 1457.0311,-342.164 1462.1777,-330.6699"/>
-<polygon fill="#191970" stroke="#191970" points="1465.3815,-332.079 1466.2738,-321.5218 1458.9927,-329.2183 1465.3815,-332.079"/>
+<path fill="none" stroke="#191970" d="M2247.0732,-363.9005C2273.395,-353.8231 2315.443,-337.7247 2348.32,-325.1375"/>
+<polygon fill="#191970" stroke="#191970" points="2349.6765,-328.3659 2357.764,-321.5218 2347.1737,-321.8287 2349.6765,-328.3659"/>
 </g>
 <!-- Node33 -->
-<g id="node32" class="node">
+<g id="node34" class="node">
 <title>Node33</title>
-<g id="a_node32"><a xlink:href="serializer_8h.html" target="_top" xlink:title="Serializer extension to support TVM data types Include this file to enable serialization of DLDataTyp...">
-<polygon fill="#ffffff" stroke="#ff0000" points="1258,-297 1258,-316 1392,-316 1392,-297 1258,-297"/>
-<text text-anchor="middle" x="1325" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/serializer.h</text>
+<g id="a_node34"><a xlink:href="serializer_8h.html" target="_top" xlink:title="Serializer extension to support TVM data types Include this file to enable serialization of DLDataTyp...">
+<polygon fill="#ffffff" stroke="#ff0000" points="2106,-297 2106,-316 2240,-316 2240,-297 2106,-297"/>
+<text text-anchor="middle" x="2173" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/serializer.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node33 -->
-<g id="edge70" class="edge">
+<g id="edge72" class="edge">
 <title>Node26&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1421.1929,-363.9005C1399.5037,-352.8057 1366.8341,-334.4128 1345.5874,-321.4424"/>
-<polygon fill="#191970" stroke="#191970" points="1347.3169,-318.3963 1336.978,-316.0817 1343.6169,-324.3385 1347.3169,-318.3963"/>
+<path fill="none" stroke="#191970" d="M2210.0789,-363.9005C2200.6902,-353.751 2188.4022,-337.4941 2180.3369,-324.8679"/>
+<polygon fill="#191970" stroke="#191970" points="2183.2308,-322.8867 2175.1103,-316.0817 2177.2147,-326.4654 2183.2308,-322.8867"/>
 </g>
 <!-- Node26&#45;&gt;Node36 -->
-<g id="edge74" class="edge">
+<g id="edge76" class="edge">
 <title>Node26&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1496.4323,-363.9584C1554.0751,-353.654 1648.5545,-336.7333 1730,-322 1743.7621,-319.5105 1758.7743,-316.7784 1772.3006,-314.3106"/>
-<polygon fill="#191970" stroke="#191970" points="1773.1006,-317.7225 1782.3094,-312.4835 1771.8434,-310.8364 1773.1006,-317.7225"/>
+<path fill="none" stroke="#191970" d="M2159.1917,-367.7087C2123.1616,-364.5262 2077.0645,-360.6905 2036,-358 1701.2249,-336.066 1617.032,-339.5789 1282,-322 1201.2614,-317.7637 1107.0271,-312.2441 1053.6474,-309.0638"/>
+<polygon fill="#191970" stroke="#191970" points="1053.7231,-305.5622 1043.5323,-308.46 1053.3059,-312.5497 1053.7231,-305.5622"/>
 </g>
 <!-- Node27&#45;&gt;Node14 -->
 <g id="edge48" class="edge">
 <title>Node27&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M562.5704,-291.4422C605.8527,-249.5848 727.3947,-132.0442 772.6945,-88.2358"/>
-<polygon fill="#191970" stroke="#191970" points="775.2407,-90.6423 779.996,-81.1746 770.3745,-85.6104 775.2407,-90.6423"/>
+<path fill="none" stroke="#191970" d="M2939.7563,-291.4246C2973.9117,-274.4825 3024.0116,-243.4144 3046,-199 3063.4541,-163.7445 3050.1519,-116.4305 3040.2301,-90.4897"/>
+<polygon fill="#191970" stroke="#191970" points="3043.4125,-89.0236 3036.4109,-81.072 3036.9256,-91.6544 3043.4125,-89.0236"/>
 </g>
 <!-- Node27&#45;&gt;Node20 -->
 <g id="edge49" class="edge">
 <title>Node27&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M610.269,-299.86C732.34,-287.0488 996.9822,-259.2749 1095.2586,-248.9609"/>
-<polygon fill="#191970" stroke="#191970" points="1095.7437,-252.4293 1105.3237,-247.9046 1095.013,-245.4676 1095.7437,-252.4293"/>
+<path fill="none" stroke="#191970" d="M2843.5985,-291.4554C2796.0685,-279.8096 2731.7715,-264.0556 2691.6029,-254.2135"/>
+<polygon fill="#191970" stroke="#191970" points="2692.2342,-250.7647 2681.6885,-251.7842 2690.5682,-257.5636 2692.2342,-250.7647"/>
 </g>
 <!-- Node28&#45;&gt;Node14 -->
 <g id="edge51" class="edge">
 <title>Node28&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M907.0677,-291.4011C887.6061,-283.3308 866.5312,-271.5574 852,-255 840.7809,-242.2165 808.5707,-135.0277 795.5554,-90.6361"/>
-<polygon fill="#191970" stroke="#191970" points="798.9117,-89.6432 792.75,-81.0244 792.1921,-91.6045 798.9117,-89.6432"/>
+<path fill="none" stroke="#191970" d="M2778.9063,-291.408C2804.3203,-283.0249 2834.0004,-271.0067 2858,-255 2929.3824,-207.391 2993.1852,-125.5263 3019.3781,-89.4569"/>
+<polygon fill="#191970" stroke="#191970" points="3022.2278,-91.489 3025.2088,-81.3221 3016.5383,-87.411 3022.2278,-91.489"/>
 </g>
 <!-- Node28&#45;&gt;Node16 -->
 <g id="edge52" class="edge">
 <title>Node28&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1020.1184,-292.3328C1023.1139,-291.8429 1026.0857,-291.3943 1029,-291 1182.1686,-270.2752 1669.2703,-251.1571 1802.204,-246.2261"/>
-<polygon fill="#191970" stroke="#191970" points="1802.347,-249.7233 1812.2112,-245.8569 1802.0889,-242.728 1802.347,-249.7233"/>
+<path fill="none" stroke="#191970" d="M2659.8931,-292.2441C2656.8945,-291.7779 2653.9189,-291.3586 2651,-291 2509.917,-273.6667 1469.6901,-250.1603 1267.8652,-245.7345"/>
+<polygon fill="#191970" stroke="#191970" points="1267.6616,-242.2293 1257.5875,-245.5097 1267.5085,-249.2276 1267.6616,-242.2293"/>
 </g>
 <!-- Node28&#45;&gt;Node20 -->
... 122742 lines suppressed ...