You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/06/28 08:02:33 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@d4be49aec62299275565066b56a0555bafc2ccac)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new e9c2a32e1 deploying docs (apache/tvm@d4be49aec62299275565066b56a0555bafc2ccac)
e9c2a32e1 is described below

commit e9c2a32e1ede6aafd5d88e4a35a7569194cd0a24
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Tue Jun 28 08:02:27 2022 +0000

    deploying docs (apache/tvm@d4be49aec62299275565066b56a0555bafc2ccac)
---
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    7 +-
 .../how_to/compile_models/from_paddle.rst.txt      |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    2 +-
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   16 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |    8 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 | 4011 +++++++-------------
 .../tune_network_cuda.rst.txt                      |    2 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |   70 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |    4 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |   34 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |   16 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |    8 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    6 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |    2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   12 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |    2 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |   20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   54 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   22 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   44 +-
 docs/commit_hash                                   |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       | 3337 +++++++++++++++-
 docs/how_to/compile_models/from_paddle.html        |    2 +-
 docs/how_to/compile_models/from_pytorch.html       |    8 +-
 docs/how_to/compile_models/from_tensorflow.html    |    2 +-
 docs/how_to/compile_models/sg_execution_times.html |   32 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   42 +-
 docs/how_to/deploy_models/deploy_prequantized.html |   11 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   38 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   16 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |    8 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    | 4011 +++++++-------------
 .../tune_with_autoscheduler/tune_network_cuda.html |    2 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |   70 +-
 .../tune_with_autotvm/sg_execution_times.html      |    4 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |   34 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 docs/how_to/work_with_microtvm/micro_train.html    |   16 +-
 .../work_with_microtvm/sg_execution_times.html     |    8 +-
 .../how_to/work_with_relay/sg_execution_times.html |    6 +-
 docs/how_to/work_with_schedules/intrin_math.html   |    2 +-
 .../work_with_schedules/sg_execution_times.html    |   12 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +-
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    4 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    2 +-
 docs/tutorial/autotvm_matmul_x86.html              |   20 +-
 docs/tutorial/autotvm_relay_x86.html               |  258 +-
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/sg_execution_times.html              |   22 +-
 docs/tutorial/tensor_expr_get_started.html         |   44 +-
 121 files changed, 6978 insertions(+), 6117 deletions(-)

diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index 59fda10e8..bd0ed4b4e 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -114,7 +114,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipad910f60-0d6a-4f9e-be31-d60e12c805a4 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip150fb657-ad5f-41ad-b546-fc435a8b919b from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index 1cbedae70..a62d4ef9e 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -112,7 +112,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<08:06, 89.3kB/s]
      0%|          | 48.0k/41.5M [00:00<05:07, 141kB/s] 
      0%|          | 96.0k/41.5M [00:00<03:38, 199kB/s]
      0%|          | 160k/41.5M [00:00<02:46, 261kB/s] 
      1%|          | 320k/41.5M [00:00<01:28, 487kB/s]
      2%|1         | 648k/41.5M [00:01<00:45, 941kB/s]
      3%|3         | 1.27M/41.5M [00:01<00:23, 1.82MB/s]
      6%|6         | 2.55M/41.5M [00:01<00:11, 3.55MB/s]
     10%|9         | 4.05M/41.5M [00:01<00:07, 5.12MB/s]
     13%|#3        | 5.56M/41.5M [00:01<00:06, 6.19MB/s]
     17%|#7        | 7.07M/41.5M [00:02<00:05, 6.92MB/s]
     21%|##        | 8.58M/41.5M [00:02<00:04, 7.42MB/s]
     24%|##4       | 10.1M/41.5M [00:02<00:04, 7.74MB/s]
     28%|##7       | 11.6M/41.5M [00:02<00:03, 8.00MB/s]
     32%|###1      | 13.1M/41.5M [00:02<00:03, 8.17MB/s]
     35%|###5      | 14.6M/41.5M [00:02<00:03, 8.28MB/s]
     39%|###8      | 16.1M/41.5M [00:03<00
 :03, 8.35MB/s]
     42%|####2     | 17.6M/41.5M [00:03<00:02, 8.42MB/s]
     46%|####6     | 19.1M/41.5M [00:03<00:02, 8.46MB/s]
     50%|####9     | 20.6M/41.5M [00:03<00:02, 8.49MB/s]
     53%|#####3    | 22.1M/41.5M [00:03<00:02, 8.52MB/s]
     57%|#####6    | 23.6M/41.5M [00:04<00:02, 8.52MB/s]
     61%|######    | 25.1M/41.5M [00:04<00:02, 8.54MB/s]
     64%|######4   | 26.6M/41.5M [00:04<00:01, 8.56MB/s]
     68%|######7   | 28.1M/41.5M [00:04<00:01, 8.55MB/s]
     71%|#######1  | 29.6M/41.5M [00:04<00:01, 8.56MB/s]
     75%|#######5  | 31.2M/41.5M [00:04<00:01, 8.56MB/s]
     79%|#######8  | 32.7M/41.5M [00:05<00:01, 8.57MB/s]
     82%|########2 | 34.2M/41.5M [00:05<00:00, 8.56MB/s]
     86%|########5 | 35.7M/41.5M [00:05<00:00, 8.55MB/s]
     90%|########9 | 37.2M/41.5M [00:05<00:00, 8.56MB/s]
     93%|#########3| 38.7M/41.5M [00:05<00:00, 8.56MB/s]
     97%|#########6| 40.2M/41.5M [00:06<00:00, 8.55MB/s]
    100%|##########| 41.5M/41.5M [00:06<00:00, 7.13MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<07:24, 97.9kB/s]
      0%|          | 48.0k/41.5M [00:00<04:40, 155kB/s] 
      0%|          | 64.0k/41.5M [00:00<05:37, 129kB/s]
      0%|          | 152k/41.5M [00:00<02:29, 289kB/s] 
      0%|          | 200k/41.5M [00:00<02:29, 290kB/s]
      1%|          | 256k/41.5M [00:01<02:20, 308kB/s]
      1%|          | 288k/41.5M [00:01<02:39, 270kB/s]
      1%|          | 320k/41.5M [00:01<02:55, 246kB/s]
      1%|          | 352k/41.5M [00:01<03:07, 230kB/s]
      1%|          | 376k/41.5M [00:01<03:31, 204kB/s]
      1%|          | 400k/41.5M [00:01<03:51, 186kB/s]
      1%|          | 424k/41.5M [00:02<04:07, 174kB/s]
      1%|1         | 448k/41.5M [00:02<05:39, 127kB/s]
      1%|1         | 464k/41.5M [00:02<05:59, 120kB/s]
      1%|1         | 496k/41.5M [00:02<05:08, 139kB/s]
      1%|1         | 512k/41.5M [00:03<07:07, 100kB/s]
      1%|1         | 536k/41.5M [00:03<06:26, 111kB/s]
      1
 %|1         | 552k/41.5M [00:03<06:38, 108kB/s]
      1%|1         | 568k/41.5M [00:03<06:49, 105kB/s]
      1%|1         | 584k/41.5M [00:03<08:51, 80.7kB/s]
      1%|1         | 600k/41.5M [00:04<08:26, 84.6kB/s]
      1%|1         | 616k/41.5M [00:04<08:08, 87.8kB/s]
      1%|1         | 632k/41.5M [00:04<09:59, 71.5kB/s]
      2%|1         | 648k/41.5M [00:04<11:20, 62.9kB/s]
      2%|1         | 664k/41.5M [00:05<11:49, 60.4kB/s]
      2%|1         | 672k/41.5M [00:05<12:18, 58.0kB/s]
      2%|1         | 680k/41.5M [00:05<12:46, 55.9kB/s]
      2%|1         | 696k/41.5M [00:05<11:25, 62.5kB/s]
      2%|1         | 704k/41.5M [00:05<12:04, 59.0kB/s]
      2%|1         | 712k/41.5M [00:06<11:53, 60.0kB/s]
      2%|1         | 728k/41.5M [00:06<10:45, 66.2kB/s]
      2%|1         | 736k/41.5M [00:06<11:36, 61.4kB/s]
      2%|1         | 744k/41.5M [00:06<11:31, 61.8kB/s]
      2%|1         | 760k/41.5M [00:06<10:29, 67.8kB/s]
      2%|1         | 768k/41.5M [00:06<10:40, 66.7kB/s
 ]
      2%|1         | 776k/41.5M [00:07<12:27, 57.2kB/s]
      2%|1         | 800k/41.5M [00:07<08:35, 82.9kB/s]
      2%|1         | 816k/41.5M [00:07<08:09, 87.1kB/s]
      2%|1         | 832k/41.5M [00:07<10:12, 69.6kB/s]
      2%|1         | 848k/41.5M [00:07<09:18, 76.4kB/s]
      2%|2         | 864k/41.5M [00:08<10:55, 65.0kB/s]
      2%|2         | 888k/41.5M [00:08<08:30, 83.4kB/s]
      2%|2         | 904k/41.5M [00:08<10:10, 69.7kB/s]
      2%|2         | 920k/41.5M [00:09<11:25, 62.1kB/s]
      2%|2         | 944k/41.5M [00:09<08:56, 79.2kB/s]
      2%|2         | 960k/41.5M [00:09<08:30, 83.3kB/s]
      2%|2         | 976k/41.5M [00:09<08:10, 86.7kB/s]
      2%|2         | 992k/41.5M [00:09<09:57, 71.1kB/s]
      2%|2         | 0.99M/41.5M [00:10<08:02, 88.0kB/s]
      2%|2         | 1.01M/41.5M [00:10<09:45, 72.6kB/s]
      2%|2         | 1.02M/41.5M [00:10<09:35, 73.7kB/s]
      3%|2         | 1.04M/41.5M [00:10<08:56, 79.1kB/s]
      3%|2         | 1.05M/41.5M [00:11
 <09:59, 70.8kB/s]
      3%|2         | 1.06M/41.5M [00:11<10:45, 65.7kB/s]
      3%|2         | 1.08M/41.5M [00:11<09:36, 73.5kB/s]
      3%|2         | 1.09M/41.5M [00:11<08:51, 79.6kB/s]
      3%|2         | 1.11M/41.5M [00:11<08:56, 78.9kB/s]
      3%|2         | 1.12M/41.5M [00:11<09:16, 76.1kB/s]
      3%|2         | 1.14M/41.5M [00:12<07:52, 89.6kB/s]
      3%|2         | 1.16M/41.5M [00:12<07:40, 91.8kB/s]
      3%|2         | 1.17M/41.5M [00:12<07:33, 93.2kB/s]
      3%|2         | 1.19M/41.5M [00:12<10:10, 69.3kB/s]
      3%|2         | 1.21M/41.5M [00:13<08:47, 80.1kB/s]
      3%|2         | 1.23M/41.5M [00:13<11:02, 63.7kB/s]
      3%|2         | 1.24M/41.5M [00:13<12:00, 58.6kB/s]
      3%|3         | 1.26M/41.5M [00:14<12:42, 55.4kB/s]
      3%|3         | 1.27M/41.5M [00:14<17:52, 39.3kB/s]
      3%|3         | 1.28M/41.5M [00:15<19:06, 36.8kB/s]
      3%|3         | 1.30M/41.5M [00:15<17:37, 39.9kB/s]
      3%|3         | 1.30M/41.5M [00:15<17:01, 41.2kB/s]
      3%|3
          | 1.31M/41.5M [00:15<16:29, 42.6kB/s]
      3%|3         | 1.32M/41.5M [00:16<16:00, 43.8kB/s]
      3%|3         | 1.33M/41.5M [00:16<15:37, 44.9kB/s]
      3%|3         | 1.34M/41.5M [00:16<15:18, 45.9kB/s]
      3%|3         | 1.34M/41.5M [00:16<15:03, 46.6kB/s]
      3%|3         | 1.35M/41.5M [00:16<14:52, 47.1kB/s]
      3%|3         | 1.36M/41.5M [00:16<14:44, 47.6kB/s]
      3%|3         | 1.38M/41.5M [00:17<11:21, 61.7kB/s]
      3%|3         | 1.38M/41.5M [00:17<12:05, 58.0kB/s]
      3%|3         | 1.40M/41.5M [00:17<10:05, 69.4kB/s]
      3%|3         | 1.41M/41.5M [00:17<14:18, 48.9kB/s]
      3%|3         | 1.42M/41.5M [00:17<11:30, 60.8kB/s]
      3%|3         | 1.44M/41.5M [00:18<09:57, 70.2kB/s]
      4%|3         | 1.45M/41.5M [00:18<09:02, 77.4kB/s]
      4%|3         | 1.47M/41.5M [00:18<08:25, 83.0kB/s]
      4%|3         | 1.48M/41.5M [00:18<10:18, 67.9kB/s]
      4%|3         | 1.50M/41.5M [00:18<09:19, 75.0kB/s]
      4%|3         | 1.52M/41.5M [00:1
 9<08:39, 80.7kB/s]
      4%|3         | 1.53M/41.5M [00:19<08:12, 85.0kB/s]
      4%|3         | 1.55M/41.5M [00:19<08:27, 82.6kB/s]
      4%|3         | 1.56M/41.5M [00:19<09:40, 72.2kB/s]
      4%|3         | 1.58M/41.5M [00:19<08:55, 78.2kB/s]
      4%|3         | 1.59M/41.5M [00:20<09:28, 73.6kB/s]
      4%|3         | 1.61M/41.5M [00:20<08:47, 79.3kB/s]
      4%|3         | 1.62M/41.5M [00:20<09:21, 74.5kB/s]
      4%|3         | 1.63M/41.5M [00:20<12:45, 54.6kB/s]
      4%|3         | 1.66M/41.5M [00:21<10:21, 67.2kB/s]
      4%|4         | 1.66M/41.5M [00:21<11:01, 63.1kB/s]
      4%|4         | 1.67M/41.5M [00:21<11:39, 59.6kB/s]
      4%|4         | 1.68M/41.5M [00:21<11:34, 60.1kB/s]
      4%|4         | 1.70M/41.5M [00:21<10:27, 66.5kB/s]
      4%|4         | 1.70M/41.5M [00:21<10:37, 65.5kB/s]
      4%|4         | 1.71M/41.5M [00:22<12:12, 57.0kB/s]
      4%|4         | 1.73M/41.5M [00:22<10:09, 68.4kB/s]
      4%|4         | 1.73M/41.5M [00:22<11:04, 62.7kB/s]
      4%|
 4         | 1.74M/41.5M [00:22<11:51, 58.6kB/s]
      4%|4         | 1.75M/41.5M [00:22<12:28, 55.7kB/s]
      4%|4         | 1.77M/41.5M [00:23<10:12, 68.0kB/s]
      4%|4         | 1.77M/41.5M [00:23<11:08, 62.3kB/s]
      4%|4         | 1.79M/41.5M [00:23<09:32, 72.7kB/s]
      4%|4         | 1.80M/41.5M [00:23<13:45, 50.4kB/s]
      4%|4         | 1.81M/41.5M [00:23<11:10, 62.0kB/s]
      4%|4         | 1.83M/41.5M [00:24<09:44, 71.1kB/s]
      4%|4         | 1.84M/41.5M [00:24<13:32, 51.2kB/s]
      4%|4         | 1.86M/41.5M [00:24<09:26, 73.4kB/s]
      5%|4         | 1.88M/41.5M [00:25<13:05, 52.9kB/s]
      5%|4         | 1.89M/41.5M [00:25<11:16, 61.4kB/s]
      5%|4         | 1.90M/41.5M [00:25<11:47, 58.7kB/s]
      5%|4         | 1.91M/41.5M [00:25<12:17, 56.3kB/s]
      5%|4         | 1.91M/41.5M [00:25<12:42, 54.4kB/s]
      5%|4         | 1.93M/41.5M [00:25<11:14, 61.5kB/s]
      5%|4         | 1.94M/41.5M [00:26<11:52, 58.2kB/s]
      5%|4         | 1.95M/41.5M [00:
 26<10:02, 68.8kB/s]
      5%|4         | 1.96M/41.5M [00:26<13:59, 49.3kB/s]
      5%|4         | 1.98M/41.5M [00:26<09:33, 72.2kB/s]
      5%|4         | 2.00M/41.5M [00:27<10:25, 66.2kB/s]
      5%|4         | 2.02M/41.5M [00:27<09:23, 73.5kB/s]
      5%|4         | 2.02M/41.5M [00:27<10:56, 63.0kB/s]
      5%|4         | 2.04M/41.5M [00:27<12:02, 57.3kB/s]
      5%|4         | 2.05M/41.5M [00:28<10:58, 62.8kB/s]
      5%|4         | 2.06M/41.5M [00:28<11:34, 59.5kB/s]
      5%|4         | 2.07M/41.5M [00:28<12:07, 56.8kB/s]
      5%|5         | 2.08M/41.5M [00:28<12:35, 54.7kB/s]
      5%|5         | 2.09M/41.5M [00:28<12:58, 53.1kB/s]
      5%|5         | 2.09M/41.5M [00:28<12:48, 53.8kB/s]
      5%|5         | 2.10M/41.5M [00:29<17:02, 40.4kB/s]
      5%|5         | 2.11M/41.5M [00:29<16:13, 42.4kB/s]
      5%|5         | 2.12M/41.5M [00:29<16:08, 42.6kB/s]
      5%|5         | 2.12M/41.5M [00:29<15:03, 45.7kB/s]
      5%|5         | 2.13M/41.5M [00:29<15:17, 45.0kB/s]
      5%
 |5         | 2.14M/41.5M [00:30<18:36, 36.9kB/s]
      5%|5         | 2.15M/41.5M [00:30<17:16, 39.8kB/s]
      5%|5         | 2.16M/41.5M [00:30<21:02, 32.7kB/s]
      5%|5         | 2.17M/41.5M [00:31<20:44, 33.1kB/s]
      5%|5         | 2.18M/41.5M [00:31<19:05, 36.0kB/s]
      5%|5         | 2.19M/41.5M [00:31<17:47, 38.6kB/s]
      5%|5         | 2.20M/41.5M [00:31<20:37, 33.3kB/s]
      5%|5         | 2.20M/41.5M [00:32<18:48, 36.5kB/s]
      5%|5         | 2.21M/41.5M [00:32<17:27, 39.3kB/s]
      5%|5         | 2.22M/41.5M [00:32<16:29, 41.6kB/s]
      5%|5         | 2.23M/41.5M [00:32<15:47, 43.5kB/s]
      5%|5         | 2.24M/41.5M [00:32<11:48, 58.1kB/s]
      5%|5         | 2.25M/41.5M [00:33<15:49, 43.3kB/s]
      5%|5         | 2.27M/41.5M [00:33<12:12, 56.2kB/s]
      5%|5         | 2.27M/41.5M [00:33<12:37, 54.3kB/s]
      5%|5         | 2.28M/41.5M [00:33<12:58, 52.8kB/s]
      6%|5         | 2.29M/41.5M [00:33<13:15, 51.6kB/s]
      6%|5         | 2.30M/41.5M [00
 :33<10:36, 64.5kB/s]
      6%|5         | 2.31M/41.5M [00:34<11:24, 60.0kB/s]
      6%|5         | 2.32M/41.5M [00:34<12:04, 56.7kB/s]
      6%|5         | 2.34M/41.5M [00:34<09:58, 68.6kB/s]
      6%|5         | 2.34M/41.5M [00:34<10:54, 62.7kB/s]
      6%|5         | 2.35M/41.5M [00:34<11:40, 58.6kB/s]
      6%|5         | 2.37M/41.5M [00:34<09:45, 70.1kB/s]
      6%|5         | 2.38M/41.5M [00:35<08:44, 78.2kB/s]
      6%|5         | 2.40M/41.5M [00:35<08:08, 83.9kB/s]
      6%|5         | 2.41M/41.5M [00:35<07:45, 88.0kB/s]
      6%|5         | 2.43M/41.5M [00:35<07:31, 90.8kB/s]
      6%|5         | 2.45M/41.5M [00:35<07:21, 92.7kB/s]
      6%|5         | 2.47M/41.5M [00:36<08:09, 83.6kB/s]
      6%|6         | 2.49M/41.5M [00:36<06:52, 99.0kB/s]
      6%|6         | 2.51M/41.5M [00:36<08:43, 78.1kB/s]
      6%|6         | 2.53M/41.5M [00:36<07:16, 93.6kB/s]
      6%|6         | 2.55M/41.5M [00:36<07:11, 94.5kB/s]
      6%|6         | 2.56M/41.5M [00:37<07:08, 95.3kB/s]
      6
 %|6         | 2.58M/41.5M [00:37<07:33, 90.1kB/s]
      6%|6         | 2.59M/41.5M [00:37<07:23, 92.0kB/s]
      6%|6         | 2.61M/41.5M [00:37<08:48, 77.2kB/s]
      6%|6         | 2.62M/41.5M [00:37<08:44, 77.6kB/s]
      6%|6         | 2.63M/41.5M [00:38<09:38, 70.4kB/s]
      6%|6         | 2.65M/41.5M [00:38<08:16, 82.1kB/s]
      6%|6         | 2.66M/41.5M [00:38<08:19, 81.5kB/s]
      6%|6         | 2.68M/41.5M [00:38<10:05, 67.2kB/s]
      6%|6         | 2.70M/41.5M [00:38<09:06, 74.4kB/s]
      7%|6         | 2.70M/41.5M [00:39<09:27, 71.6kB/s]
      7%|6         | 2.71M/41.5M [00:39<11:42, 57.9kB/s]
      7%|6         | 2.73M/41.5M [00:39<11:20, 59.8kB/s]
      7%|6         | 2.73M/41.5M [00:39<11:52, 57.0kB/s]
      7%|6         | 2.74M/41.5M [00:39<12:20, 54.9kB/s]
      7%|6         | 2.76M/41.5M [00:40<12:57, 52.2kB/s]
      7%|6         | 2.77M/41.5M [00:40<13:10, 51.4kB/s]
      7%|6         | 2.78M/41.5M [00:40<10:45, 62.9kB/s]
      7%|6         | 2.79M/41.5M [0
 0:40<11:26, 59.1kB/s]
      7%|6         | 2.80M/41.5M [00:40<09:43, 69.6kB/s]
      7%|6         | 2.81M/41.5M [00:41<13:35, 49.7kB/s]
      7%|6         | 2.83M/41.5M [00:41<11:03, 61.1kB/s]
      7%|6         | 2.84M/41.5M [00:41<11:39, 57.9kB/s]
      7%|6         | 2.85M/41.5M [00:41<09:52, 68.4kB/s]
      7%|6         | 2.86M/41.5M [00:41<10:44, 62.9kB/s]
      7%|6         | 2.88M/41.5M [00:42<09:17, 72.6kB/s]
      7%|6         | 2.88M/41.5M [00:42<10:15, 65.7kB/s]
      7%|6         | 2.89M/41.5M [00:42<11:06, 60.8kB/s]
      7%|7         | 2.91M/41.5M [00:42<09:26, 71.5kB/s]
      7%|7         | 2.92M/41.5M [00:42<08:31, 79.1kB/s]
      7%|7         | 2.93M/41.5M [00:42<09:36, 70.1kB/s]
      7%|7         | 2.95M/41.5M [00:43<08:37, 78.2kB/s]
      7%|7         | 2.96M/41.5M [00:43<10:25, 64.6kB/s]
      7%|7         | 2.98M/41.5M [00:43<08:24, 80.0kB/s]
      7%|7         | 3.00M/41.5M [00:43<07:58, 84.3kB/s]
      7%|7         | 3.02M/41.5M [00:44<07:39, 87.7kB/s]
      
 7%|7         | 3.03M/41.5M [00:44<09:28, 70.9kB/s]
      7%|7         | 3.05M/41.5M [00:44<08:42, 77.1kB/s]
      7%|7         | 3.06M/41.5M [00:44<08:10, 82.2kB/s]
      7%|7         | 3.08M/41.5M [00:44<07:47, 86.2kB/s]
      7%|7         | 3.09M/41.5M [00:45<07:31, 89.2kB/s]
      7%|7         | 3.11M/41.5M [00:45<07:19, 91.5kB/s]
      8%|7         | 3.12M/41.5M [00:45<07:11, 93.2kB/s]
      8%|7         | 3.14M/41.5M [00:45<07:06, 94.4kB/s]
      8%|7         | 3.16M/41.5M [00:45<07:01, 95.3kB/s]
      8%|7         | 3.17M/41.5M [00:45<06:59, 95.9kB/s]
      8%|7         | 3.19M/41.5M [00:46<06:57, 96.3kB/s]
      8%|7         | 3.20M/41.5M [00:46<06:55, 96.6kB/s]
      8%|7         | 3.22M/41.5M [00:46<06:54, 96.8kB/s]
      8%|7         | 3.24M/41.5M [00:46<05:59, 111kB/s] 
      8%|7         | 3.26M/41.5M [00:46<08:05, 82.6kB/s]
      8%|7         | 3.29M/41.5M [00:47<07:33, 88.3kB/s]
      8%|7         | 3.30M/41.5M [00:47<07:47, 85.6kB/s]
      8%|8         | 3.32M/41.5M [
 00:47<08:05, 82.4kB/s]
      8%|8         | 3.34M/41.5M [00:47<09:35, 69.6kB/s]
      8%|8         | 3.34M/41.5M [00:48<10:14, 65.1kB/s]
      8%|8         | 3.35M/41.5M [00:48<10:52, 61.2kB/s]
      8%|8         | 3.36M/41.5M [00:48<11:28, 58.1kB/s]
      8%|8         | 3.37M/41.5M [00:48<10:59, 60.6kB/s]
      8%|8         | 3.38M/41.5M [00:48<10:11, 65.3kB/s]
      8%|8         | 3.39M/41.5M [00:49<10:57, 60.7kB/s]
      8%|8         | 3.41M/41.5M [00:49<09:21, 71.1kB/s]
      8%|8         | 3.41M/41.5M [00:49<10:17, 64.6kB/s]
      8%|8         | 3.43M/41.5M [00:49<08:58, 74.1kB/s]
      8%|8         | 3.45M/41.5M [00:49<08:12, 80.9kB/s]
      8%|8         | 3.45M/41.5M [00:49<09:18, 71.4kB/s]
      8%|8         | 3.47M/41.5M [00:50<08:24, 79.0kB/s]
      8%|8         | 3.48M/41.5M [00:50<07:51, 84.5kB/s]
      8%|8         | 3.51M/41.5M [00:50<06:27, 103kB/s] 
      8%|8         | 3.52M/41.5M [00:50<06:33, 101kB/s]
      9%|8         | 3.54M/41.5M [00:50<06:38, 100kB/s]
      9
 %|8         | 3.55M/41.5M [00:50<06:42, 98.8kB/s]
      9%|8         | 3.57M/41.5M [00:51<06:43, 98.6kB/s]
      9%|8         | 3.59M/41.5M [00:51<06:45, 98.0kB/s]
      9%|8         | 3.60M/41.5M [00:51<08:47, 75.4kB/s]
      9%|8         | 3.62M/41.5M [00:51<08:12, 80.7kB/s]
      9%|8         | 3.63M/41.5M [00:52<09:48, 67.5kB/s]
      9%|8         | 3.65M/41.5M [00:52<08:53, 74.3kB/s]
      9%|8         | 3.66M/41.5M [00:52<08:16, 79.9kB/s]
      9%|8         | 3.68M/41.5M [00:52<07:50, 84.3kB/s]
      9%|8         | 3.70M/41.5M [00:52<09:33, 69.1kB/s]
      9%|8         | 3.70M/41.5M [00:53<12:40, 52.1kB/s]
      9%|8         | 3.71M/41.5M [00:53<19:33, 33.7kB/s]
      9%|9         | 3.74M/41.5M [00:53<10:54, 60.5kB/s]
      9%|9         | 3.76M/41.5M [00:54<09:45, 67.6kB/s]
      9%|9         | 3.77M/41.5M [00:54<10:49, 60.9kB/s]
      9%|9         | 3.79M/41.5M [00:54<09:39, 68.2kB/s]
      9%|9         | 3.80M/41.5M [00:54<08:48, 74.7kB/s]
      9%|9         | 3.82M/41.5M [0
 0:55<10:11, 64.5kB/s]
      9%|9         | 3.84M/41.5M [00:55<07:59, 82.3kB/s]
      9%|9         | 3.86M/41.5M [00:55<07:39, 85.8kB/s]
      9%|9         | 3.88M/41.5M [00:56<17:09, 38.3kB/s]
      9%|9         | 3.92M/41.5M [00:56<08:50, 74.3kB/s]
      9%|9         | 3.94M/41.5M [00:57<09:47, 67.1kB/s]
     10%|9         | 3.95M/41.5M [00:57<10:37, 61.7kB/s]
     10%|9         | 3.98M/41.5M [00:57<08:36, 76.2kB/s]
     10%|9         | 3.99M/41.5M [00:57<09:45, 67.1kB/s]
     10%|9         | 4.01M/41.5M [00:58<11:06, 59.0kB/s]
     10%|9         | 4.02M/41.5M [00:58<10:57, 59.7kB/s]
     10%|9         | 4.02M/41.5M [00:58<11:59, 54.6kB/s]
     10%|9         | 4.03M/41.5M [00:58<11:37, 56.3kB/s]
     10%|9         | 4.05M/41.5M [00:58<10:25, 62.8kB/s]
     10%|9         | 4.05M/41.5M [00:59<11:02, 59.2kB/s]
     10%|9         | 4.06M/41.5M [00:59<10:50, 60.3kB/s]
     10%|9         | 4.08M/41.5M [00:59<09:51, 66.3kB/s]
     10%|9         | 4.09M/41.5M [00:59<11:08, 58.6kB/s]
     1
 0%|9         | 4.11M/41.5M [00:59<09:05, 71.9kB/s]
     10%|9         | 4.12M/41.5M [01:00<08:50, 73.9kB/s]
     10%|9         | 4.13M/41.5M [01:00<11:37, 56.1kB/s]
     10%|#         | 4.16M/41.5M [01:00<08:54, 73.2kB/s]
     10%|#         | 4.16M/41.5M [01:00<09:42, 67.2kB/s]
     10%|#         | 4.17M/41.5M [01:01<13:08, 49.7kB/s]
     10%|#         | 4.20M/41.5M [01:01<09:07, 71.4kB/s]
     10%|#         | 4.21M/41.5M [01:01<10:25, 62.5kB/s]
     10%|#         | 4.22M/41.5M [01:01<10:22, 62.8kB/s]
     10%|#         | 4.23M/41.5M [01:01<09:40, 67.3kB/s]
     10%|#         | 4.24M/41.5M [01:02<10:23, 62.6kB/s]
     10%|#         | 4.25M/41.5M [01:02<10:19, 63.0kB/s]
     10%|#         | 4.27M/41.5M [01:02<09:33, 68.1kB/s]
     10%|#         | 4.27M/41.5M [01:02<09:40, 67.3kB/s]
     10%|#         | 4.29M/41.5M [01:02<08:31, 76.2kB/s]
     10%|#         | 4.30M/41.5M [01:02<07:52, 82.5kB/s]
     10%|#         | 4.32M/41.5M [01:03<07:28, 87.0kB/s]
     10%|#         | 4.34M/41.5M [
 01:03<07:12, 90.1kB/s]
     10%|#         | 4.35M/41.5M [01:03<07:02, 92.2kB/s]
     11%|#         | 4.37M/41.5M [01:03<06:55, 93.8kB/s]
     11%|#         | 4.38M/41.5M [01:03<06:50, 94.8kB/s]
     11%|#         | 4.40M/41.5M [01:03<06:46, 95.6kB/s]
     11%|#         | 4.41M/41.5M [01:04<06:44, 96.1kB/s]
     11%|#         | 4.44M/41.5M [01:04<05:49, 111kB/s] 
     11%|#         | 4.45M/41.5M [01:04<06:03, 107kB/s]
     11%|#         | 4.48M/41.5M [01:04<05:27, 119kB/s]
     11%|#         | 4.50M/41.5M [01:04<05:05, 127kB/s]
     11%|#         | 4.52M/41.5M [01:04<04:52, 132kB/s]
     11%|#         | 4.55M/41.5M [01:05<06:08, 105kB/s]
     11%|#1        | 4.58M/41.5M [01:05<05:04, 127kB/s]
     11%|#1        | 4.59M/41.5M [01:05<06:48, 94.7kB/s]
     11%|#1        | 4.62M/41.5M [01:06<06:44, 95.7kB/s]
     11%|#1        | 4.65M/41.5M [01:06<06:04, 106kB/s] 
     11%|#1        | 4.66M/41.5M [01:06<06:32, 98.3kB/s]
     11%|#1        | 4.68M/41.5M [01:06<07:43, 83.3kB/s]
     11%|#1
         | 4.70M/41.5M [01:06<07:51, 81.9kB/s]
     11%|#1        | 4.71M/41.5M [01:07<09:16, 69.3kB/s]
     11%|#1        | 4.73M/41.5M [01:07<08:32, 75.3kB/s]
     11%|#1        | 4.74M/41.5M [01:07<09:23, 68.4kB/s]
     11%|#1        | 4.76M/41.5M [01:08<10:28, 61.3kB/s]
     11%|#1        | 4.77M/41.5M [01:08<10:56, 58.7kB/s]
     12%|#1        | 4.77M/41.5M [01:08<12:00, 53.5kB/s]
     12%|#1        | 4.79M/41.5M [01:08<11:51, 54.1kB/s]
     12%|#1        | 4.80M/41.5M [01:08<12:07, 52.9kB/s]
     12%|#1        | 4.80M/41.5M [01:09<12:21, 51.9kB/s]
     12%|#1        | 4.81M/41.5M [01:09<12:33, 51.0kB/s]
     12%|#1        | 4.82M/41.5M [01:09<12:42, 50.4kB/s]
     12%|#1        | 4.83M/41.5M [01:09<12:50, 49.9kB/s]
     12%|#1        | 4.84M/41.5M [01:09<12:55, 49.6kB/s]
     12%|#1        | 4.84M/41.5M [01:10<16:44, 38.2kB/s]
     12%|#1        | 4.86M/41.5M [01:10<15:08, 42.3kB/s]
     12%|#1        | 4.87M/41.5M [01:10<21:02, 30.4kB/s]
     12%|#1        | 4.88M/41.5M [01:11
 <15:48, 40.5kB/s]
     12%|#1        | 4.89M/41.5M [01:11<17:24, 36.8kB/s]
     12%|#1        | 4.90M/41.5M [01:11<17:11, 37.2kB/s]
     12%|#1        | 4.91M/41.5M [01:11<18:39, 34.3kB/s]
     12%|#1        | 4.92M/41.5M [01:12<14:14, 44.9kB/s]
     12%|#1        | 4.93M/41.5M [01:12<13:58, 45.7kB/s]
     12%|#1        | 4.94M/41.5M [01:12<20:18, 31.4kB/s]
     12%|#1        | 4.95M/41.5M [01:13<13:53, 46.0kB/s]
     12%|#1        | 4.96M/41.5M [01:13<13:42, 46.5kB/s]
     12%|#1        | 4.97M/41.5M [01:13<16:47, 38.0kB/s]
     12%|#2        | 4.98M/41.5M [01:13<13:56, 45.7kB/s]
     12%|#2        | 4.99M/41.5M [01:14<16:20, 39.0kB/s]
     12%|#2        | 5.00M/41.5M [01:14<19:59, 31.9kB/s]
     12%|#2        | 5.02M/41.5M [01:14<14:36, 43.6kB/s]
     12%|#2        | 5.02M/41.5M [01:14<14:16, 44.7kB/s]
     12%|#2        | 5.03M/41.5M [01:14<13:58, 45.6kB/s]
     12%|#2        | 5.04M/41.5M [01:15<13:44, 46.3kB/s]
     12%|#2        | 5.05M/41.5M [01:15<13:33, 47.0kB/s]
     12%|#
 2        | 5.05M/41.5M [01:15<13:25, 47.4kB/s]
     12%|#2        | 5.06M/41.5M [01:15<13:19, 47.8kB/s]
     12%|#2        | 5.08M/41.5M [01:15<11:04, 57.5kB/s]
     12%|#2        | 5.09M/41.5M [01:16<10:42, 59.5kB/s]
     12%|#2        | 5.09M/41.5M [01:16<11:18, 56.3kB/s]
     12%|#2        | 5.11M/41.5M [01:16<12:02, 52.8kB/s]
     12%|#2        | 5.12M/41.5M [01:16<12:25, 51.1kB/s]
     12%|#2        | 5.14M/41.5M [01:17<10:58, 57.9kB/s]
     12%|#2        | 5.15M/41.5M [01:17<11:23, 55.8kB/s]
     12%|#2        | 5.16M/41.5M [01:17<14:39, 43.3kB/s]
     12%|#2        | 5.17M/41.5M [01:17<14:01, 45.2kB/s]
     13%|#2        | 5.19M/41.5M [01:18<15:59, 39.7kB/s]
     13%|#2        | 5.20M/41.5M [01:18<15:23, 41.2kB/s]
     13%|#2        | 5.20M/41.5M [01:19<20:43, 30.6kB/s]
     13%|#2        | 5.22M/41.5M [01:19<17:42, 35.8kB/s]
     13%|#2        | 5.23M/41.5M [01:19<16:40, 38.0kB/s]
     13%|#2        | 5.23M/41.5M [01:19<18:54, 33.5kB/s]
     13%|#2        | 5.24M/41.5M [01:2
 0<17:24, 36.4kB/s]
     13%|#2        | 5.25M/41.5M [01:20<16:14, 39.0kB/s]
     13%|#2        | 5.26M/41.5M [01:20<15:20, 41.3kB/s]
     13%|#2        | 5.27M/41.5M [01:20<22:01, 28.7kB/s]
     13%|#2        | 5.28M/41.5M [01:21<15:05, 41.9kB/s]
     13%|#2        | 5.29M/41.5M [01:21<14:34, 43.4kB/s]
     13%|#2        | 5.30M/41.5M [01:21<14:10, 44.6kB/s]
     13%|#2        | 5.30M/41.5M [01:21<13:51, 45.6kB/s]
     13%|#2        | 5.31M/41.5M [01:21<17:13, 36.7kB/s]
     13%|#2        | 5.32M/41.5M [01:22<16:00, 39.5kB/s]
     13%|#2        | 5.33M/41.5M [01:22<15:08, 41.7kB/s]
     13%|#2        | 5.34M/41.5M [01:22<14:30, 43.5kB/s]
     13%|#2        | 5.34M/41.5M [01:22<14:03, 44.9kB/s]
     13%|#2        | 5.35M/41.5M [01:22<13:44, 46.0kB/s]
     13%|#2        | 5.36M/41.5M [01:22<13:30, 46.7kB/s]
     13%|#2        | 5.37M/41.5M [01:23<13:20, 47.3kB/s]
     13%|#2        | 5.38M/41.5M [01:23<13:13, 47.7kB/s]
     13%|#2        | 5.39M/41.5M [01:23<10:07, 62.3kB/s]
     13%|
 #3        | 5.40M/41.5M [01:23<10:49, 58.3kB/s]
     13%|#3        | 5.41M/41.5M [01:23<11:22, 55.4kB/s]
     13%|#3        | 5.42M/41.5M [01:24<12:02, 52.3kB/s]
     13%|#3        | 5.44M/41.5M [01:24<09:56, 63.4kB/s]
     13%|#3        | 5.45M/41.5M [01:24<09:16, 67.9kB/s]
     13%|#3        | 5.46M/41.5M [01:24<09:22, 67.2kB/s]
     13%|#3        | 5.48M/41.5M [01:24<10:39, 59.0kB/s]
     13%|#3        | 5.49M/41.5M [01:25<09:13, 68.2kB/s]
     13%|#3        | 5.51M/41.5M [01:25<08:50, 71.1kB/s]
     13%|#3        | 5.52M/41.5M [01:25<09:00, 69.8kB/s]
     13%|#3        | 5.53M/41.5M [01:25<08:40, 72.5kB/s]
     13%|#3        | 5.55M/41.5M [01:25<07:55, 79.2kB/s]
     13%|#3        | 5.55M/41.5M [01:26<08:53, 70.7kB/s]
     13%|#3        | 5.57M/41.5M [01:26<10:18, 60.9kB/s]
     13%|#3        | 5.59M/41.5M [01:26<09:31, 65.9kB/s]
     14%|#3        | 5.60M/41.5M [01:26<08:31, 73.6kB/s]
     14%|#3        | 5.61M/41.5M [01:26<08:45, 71.6kB/s]
     14%|#3        | 5.62M/41.5M [01:
 27<10:44, 58.4kB/s]
     14%|#3        | 5.64M/41.5M [01:27<10:57, 57.2kB/s]
     14%|#3        | 5.65M/41.5M [01:27<11:19, 55.3kB/s]
     14%|#3        | 5.66M/41.5M [01:27<09:35, 65.3kB/s]
     14%|#3        | 5.67M/41.5M [01:28<10:14, 61.1kB/s]
     14%|#3        | 5.68M/41.5M [01:28<10:49, 57.8kB/s]
     14%|#3        | 5.69M/41.5M [01:28<11:19, 55.3kB/s]
     14%|#3        | 5.70M/41.5M [01:28<11:39, 53.7kB/s]
     14%|#3        | 5.70M/41.5M [01:28<15:25, 40.5kB/s]
     14%|#3        | 5.73M/41.5M [01:29<10:05, 61.9kB/s]
     14%|#3        | 5.73M/41.5M [01:29<10:39, 58.7kB/s]
     14%|#3        | 5.74M/41.5M [01:29<10:24, 60.0kB/s]
     14%|#3        | 5.75M/41.5M [01:29<10:59, 56.8kB/s]
     14%|#3        | 5.76M/41.5M [01:29<11:28, 54.4kB/s]
     14%|#3        | 5.77M/41.5M [01:29<11:50, 52.8kB/s]
     14%|#3        | 5.77M/41.5M [01:30<13:00, 48.0kB/s]
     14%|#3        | 5.78M/41.5M [01:30<16:35, 37.6kB/s]
     14%|#3        | 5.79M/41.5M [01:30<15:30, 40.2kB/s]
     14%
 |#3        | 5.80M/41.5M [01:30<18:27, 33.8kB/s]
     14%|#4        | 5.81M/41.5M [01:31<12:58, 48.1kB/s]
     14%|#4        | 5.82M/41.5M [01:31<12:55, 48.2kB/s]
     14%|#4        | 5.83M/41.5M [01:31<12:53, 48.3kB/s]
     14%|#4        | 5.84M/41.5M [01:31<16:19, 38.2kB/s]
     14%|#4        | 5.85M/41.5M [01:31<11:59, 51.9kB/s]
     14%|#4        | 5.86M/41.5M [01:32<12:56, 48.1kB/s]
     14%|#4        | 5.87M/41.5M [01:32<12:54, 48.2kB/s]
     14%|#4        | 5.88M/41.5M [01:32<12:01, 51.8kB/s]
     14%|#4        | 5.88M/41.5M [01:32<12:13, 50.9kB/s]
     14%|#4        | 5.89M/41.5M [01:32<15:59, 38.9kB/s]
     14%|#4        | 5.91M/41.5M [01:33<12:29, 49.8kB/s]
     14%|#4        | 5.93M/41.5M [01:33<10:24, 59.7kB/s]
     14%|#4        | 5.94M/41.5M [01:33<13:22, 46.5kB/s]
     14%|#4        | 5.95M/41.5M [01:34<11:26, 54.3kB/s]
     14%|#4        | 5.96M/41.5M [01:34<11:02, 56.2kB/s]
     14%|#4        | 5.97M/41.5M [01:34<11:25, 54.3kB/s]
     14%|#4        | 5.98M/41.5M [01
 :34<11:45, 52.8kB/s]
     14%|#4        | 5.98M/41.5M [01:34<12:00, 51.7kB/s]
     14%|#4        | 6.00M/41.5M [01:34<10:17, 60.2kB/s]
     14%|#4        | 6.01M/41.5M [01:35<10:51, 57.1kB/s]
     14%|#4        | 6.02M/41.5M [01:35<10:31, 58.9kB/s]
     15%|#4        | 6.03M/41.5M [01:35<09:27, 65.5kB/s]
     15%|#4        | 6.04M/41.5M [01:35<09:28, 65.4kB/s]
     15%|#4        | 6.05M/41.5M [01:35<08:52, 69.8kB/s]
     15%|#4        | 6.06M/41.5M [01:35<09:01, 68.7kB/s]
     15%|#4        | 6.08M/41.5M [01:36<11:03, 56.0kB/s]
     15%|#4        | 6.10M/41.5M [01:36<07:58, 77.5kB/s]
     15%|#4        | 6.12M/41.5M [01:36<11:18, 54.7kB/s]
     15%|#4        | 6.14M/41.5M [01:37<08:31, 72.5kB/s]
     15%|#4        | 6.16M/41.5M [01:37<11:22, 54.3kB/s]
     15%|#4        | 6.17M/41.5M [01:37<11:46, 52.4kB/s]
     15%|#4        | 6.18M/41.5M [01:38<11:53, 51.9kB/s]
     15%|#4        | 6.19M/41.5M [01:38<14:29, 42.6kB/s]
     15%|#4        | 6.20M/41.5M [01:38<16:49, 36.7kB/s]
     15
 %|#4        | 6.20M/41.5M [01:38<15:50, 38.9kB/s]
     15%|#4        | 6.21M/41.5M [01:39<18:13, 33.8kB/s]
     15%|#4        | 6.22M/41.5M [01:39<23:27, 26.3kB/s]
     15%|#5        | 6.23M/41.5M [01:40<38:31, 16.0kB/s]
     15%|#5        | 6.23M/41.5M [01:41<38:22, 16.1kB/s]
     15%|#5        | 6.24M/41.5M [01:42<42:34, 14.5kB/s]
     15%|#5        | 6.25M/41.5M [01:42<37:33, 16.4kB/s]
     15%|#5        | 6.26M/41.5M [01:43<40:39, 15.1kB/s]
     15%|#5        | 6.27M/41.5M [01:43<32:22, 19.0kB/s]
     15%|#5        | 6.27M/41.5M [01:43<30:16, 20.3kB/s]
     15%|#5        | 6.28M/41.5M [01:43<25:01, 24.6kB/s]
     15%|#5        | 6.29M/41.5M [01:44<25:05, 24.5kB/s]
     15%|#5        | 6.30M/41.5M [01:44<21:21, 28.8kB/s]
     15%|#5        | 6.30M/41.5M [01:44<18:45, 32.8kB/s]
     15%|#5        | 6.31M/41.5M [01:44<16:54, 36.3kB/s]
     15%|#5        | 6.32M/41.5M [01:44<15:37, 39.3kB/s]
     15%|#5        | 6.34M/41.5M [01:44<11:20, 54.1kB/s]
     15%|#5        | 6.34M/41.5M [0
 1:45<14:46, 41.6kB/s]
     15%|#5        | 6.36M/41.5M [01:45<11:55, 51.5kB/s]
     15%|#5        | 6.37M/41.5M [01:45<13:06, 46.8kB/s]
     15%|#5        | 6.38M/41.5M [01:45<12:58, 47.3kB/s]
     15%|#5        | 6.39M/41.5M [01:46<11:14, 54.6kB/s]
     15%|#5        | 6.40M/41.5M [01:46<10:25, 58.8kB/s]
     15%|#5        | 6.41M/41.5M [01:46<14:02, 43.6kB/s]
     15%|#5        | 6.42M/41.5M [01:46<11:52, 51.6kB/s]
     15%|#5        | 6.43M/41.5M [01:46<12:01, 50.9kB/s]
     16%|#5        | 6.45M/41.5M [01:47<09:46, 62.7kB/s]
     16%|#5        | 6.45M/41.5M [01:47<10:23, 59.0kB/s]
     16%|#5        | 6.46M/41.5M [01:47<13:54, 44.0kB/s]
     16%|#5        | 6.48M/41.5M [01:47<10:49, 56.5kB/s]
     16%|#5        | 6.48M/41.5M [01:47<11:12, 54.6kB/s]
     16%|#5        | 6.49M/41.5M [01:48<11:32, 53.0kB/s]
     16%|#5        | 6.51M/41.5M [01:48<09:22, 65.2kB/s]
     16%|#5        | 6.52M/41.5M [01:48<10:06, 60.5kB/s]
     16%|#5        | 6.52M/41.5M [01:48<10:41, 57.1kB/s]
     1
 6%|#5        | 6.54M/41.5M [01:48<09:32, 64.0kB/s]
     16%|#5        | 6.55M/41.5M [01:49<13:03, 46.7kB/s]
     16%|#5        | 6.56M/41.5M [01:49<13:29, 45.2kB/s]
     16%|#5        | 6.58M/41.5M [01:49<14:16, 42.7kB/s]
     16%|#5        | 6.59M/41.5M [01:50<21:34, 28.3kB/s]
     16%|#5        | 6.59M/41.5M [01:50<22:21, 27.3kB/s]
     16%|#5        | 6.60M/41.5M [01:51<23:00, 26.5kB/s]
     16%|#5        | 6.61M/41.5M [01:51<20:17, 30.0kB/s]
     16%|#5        | 6.62M/41.5M [01:51<21:34, 28.2kB/s]
     16%|#5        | 6.62M/41.5M [01:51<19:03, 32.0kB/s]
     16%|#5        | 6.63M/41.5M [01:52<17:11, 35.4kB/s]
     16%|#6        | 6.64M/41.5M [01:52<15:50, 38.5kB/s]
     16%|#6        | 6.65M/41.5M [01:52<14:51, 41.0kB/s]
     16%|#6        | 6.66M/41.5M [01:52<14:09, 43.0kB/s]
     16%|#6        | 6.67M/41.5M [01:52<10:33, 57.7kB/s]
     16%|#6        | 6.68M/41.5M [01:52<11:01, 55.2kB/s]
     16%|#6        | 6.69M/41.5M [01:53<11:24, 53.3kB/s]
     16%|#6        | 6.70M/41.5M [
 01:53<09:12, 66.0kB/s]
     16%|#6        | 6.72M/41.5M [01:53<08:05, 75.1kB/s]
     16%|#6        | 6.73M/41.5M [01:53<09:01, 67.4kB/s]
     16%|#6        | 6.75M/41.5M [01:53<06:41, 90.6kB/s]
     16%|#6        | 6.77M/41.5M [01:53<06:33, 92.6kB/s]
     16%|#6        | 6.79M/41.5M [01:54<05:35, 109kB/s] 
     16%|#6        | 6.81M/41.5M [01:54<05:03, 120kB/s]
     16%|#6        | 6.84M/41.5M [01:54<04:44, 128kB/s]
     17%|#6        | 6.87M/41.5M [01:54<05:19, 114kB/s]
     17%|#6        | 6.92M/41.5M [01:54<03:33, 170kB/s]
     17%|#6        | 6.95M/41.5M [01:55<03:25, 176kB/s]
     17%|#6        | 6.98M/41.5M [01:55<03:35, 168kB/s]
     17%|#6        | 7.00M/41.5M [01:55<03:43, 162kB/s]
     17%|#6        | 7.02M/41.5M [01:55<03:49, 157kB/s]
     17%|#6        | 7.04M/41.5M [01:55<04:19, 139kB/s]
     17%|#7        | 7.07M/41.5M [01:55<03:51, 156kB/s]
     17%|#7        | 7.09M/41.5M [01:56<04:20, 139kB/s]
     17%|#7        | 7.10M/41.5M [01:56<06:09, 97.5kB/s]
     17%|#7     
    | 7.12M/41.5M [01:56<05:29, 109kB/s] 
     17%|#7        | 7.14M/41.5M [01:56<05:38, 106kB/s]
     17%|#7        | 7.16M/41.5M [01:56<05:46, 104kB/s]
     17%|#7        | 7.17M/41.5M [01:57<05:52, 102kB/s]
     17%|#7        | 7.19M/41.5M [01:57<07:39, 78.4kB/s]
     17%|#7        | 7.20M/41.5M [01:57<07:13, 82.9kB/s]
     17%|#7        | 7.22M/41.5M [01:57<06:54, 86.6kB/s]
     17%|#7        | 7.23M/41.5M [01:58<08:28, 70.6kB/s]
     17%|#7        | 7.26M/41.5M [01:58<07:12, 82.9kB/s]
     18%|#7        | 7.27M/41.5M [01:58<08:08, 73.4kB/s]
     18%|#7        | 7.29M/41.5M [01:58<08:01, 74.5kB/s]
     18%|#7        | 7.30M/41.5M [01:59<08:12, 72.8kB/s]
     18%|#7        | 7.31M/41.5M [01:59<07:32, 79.2kB/s]
     18%|#7        | 7.32M/41.5M [01:59<08:25, 70.9kB/s]
     18%|#7        | 7.34M/41.5M [01:59<07:37, 78.3kB/s]
     18%|#7        | 7.35M/41.5M [01:59<07:07, 83.7kB/s]
     18%|#7        | 7.37M/41.5M [01:59<07:49, 76.2kB/s]
     18%|#7        | 7.38M/41.5M [02:00<06:47, 
 87.8kB/s]
     18%|#7        | 7.40M/41.5M [02:00<10:23, 57.4kB/s]
     18%|#7        | 7.42M/41.5M [02:00<07:51, 75.7kB/s]
     18%|#7        | 7.44M/41.5M [02:01<09:55, 60.0kB/s]
     18%|#7        | 7.45M/41.5M [02:01<08:50, 67.2kB/s]
     18%|#8        | 7.47M/41.5M [02:01<09:49, 60.6kB/s]
     18%|#8        | 7.48M/41.5M [02:01<10:13, 58.1kB/s]
     18%|#8        | 7.48M/41.5M [02:02<10:36, 56.0kB/s]
     18%|#8        | 7.50M/41.5M [02:02<11:11, 53.1kB/s]
     18%|#8        | 7.52M/41.5M [02:02<09:57, 59.6kB/s]
     18%|#8        | 7.52M/41.5M [02:02<10:23, 57.1kB/s]
     18%|#8        | 7.53M/41.5M [02:02<10:47, 55.0kB/s]
     18%|#8        | 7.54M/41.5M [02:03<10:25, 56.9kB/s]
     18%|#8        | 7.55M/41.5M [02:03<10:52, 54.6kB/s]
     18%|#8        | 7.56M/41.5M [02:03<08:51, 66.9kB/s]
     18%|#8        | 7.57M/41.5M [02:03<09:37, 61.6kB/s]
     18%|#8        | 7.58M/41.5M [02:03<10:15, 57.8kB/s]
     18%|#8        | 7.59M/41.5M [02:04<11:40, 50.8kB/s]
     18%|#8       
  | 7.61M/41.5M [02:04<09:03, 65.4kB/s]
     18%|#8        | 7.62M/41.5M [02:04<12:17, 48.2kB/s]
     18%|#8        | 7.63M/41.5M [02:04<09:56, 59.5kB/s]
     18%|#8        | 7.64M/41.5M [02:04<10:24, 56.8kB/s]
     18%|#8        | 7.65M/41.5M [02:05<10:48, 54.7kB/s]
     18%|#8        | 7.66M/41.5M [02:05<11:08, 53.1kB/s]
     18%|#8        | 7.66M/41.5M [02:05<11:24, 51.8kB/s]
     18%|#8        | 7.67M/41.5M [02:05<11:36, 50.9kB/s]
     19%|#8        | 7.68M/41.5M [02:05<14:21, 41.1kB/s]
     19%|#8        | 7.70M/41.5M [02:06<10:40, 55.4kB/s]
     19%|#8        | 7.70M/41.5M [02:06<11:01, 53.6kB/s]
     19%|#8        | 7.71M/41.5M [02:06<11:18, 52.2kB/s]
     19%|#8        | 7.72M/41.5M [02:06<10:16, 57.4kB/s]
     19%|#8        | 7.73M/41.5M [02:06<10:47, 54.7kB/s]
     19%|#8        | 7.73M/41.5M [02:06<11:11, 52.7kB/s]
     19%|#8        | 7.74M/41.5M [02:06<11:27, 51.5kB/s]
     19%|#8        | 7.76M/41.5M [02:07<12:47, 46.1kB/s]
     19%|#8        | 7.77M/41.5M [02:07<09:15,
  63.6kB/s]
     19%|#8        | 7.78M/41.5M [02:07<09:52, 59.6kB/s]
     19%|#8        | 7.79M/41.5M [02:07<10:24, 56.6kB/s]
     19%|#8        | 7.80M/41.5M [02:07<08:38, 68.1kB/s]
     19%|#8        | 7.81M/41.5M [02:08<09:25, 62.5kB/s]
     19%|#8        | 7.82M/41.5M [02:08<10:04, 58.4kB/s]
     19%|#8        | 7.84M/41.5M [02:08<08:25, 69.8kB/s]
     19%|#8        | 7.84M/41.5M [02:09<18:40, 31.5kB/s]
     19%|#8        | 7.88M/41.5M [02:09<09:29, 61.9kB/s]
     19%|#9        | 7.90M/41.5M [02:09<08:57, 65.5kB/s]
     19%|#9        | 7.91M/41.5M [02:10<11:02, 53.1kB/s]
     19%|#9        | 7.93M/41.5M [02:10<09:38, 60.9kB/s]
     19%|#9        | 7.94M/41.5M [02:10<10:02, 58.4kB/s]
     19%|#9        | 7.95M/41.5M [02:10<09:12, 63.7kB/s]
     19%|#9        | 7.96M/41.5M [02:11<11:26, 51.2kB/s]
     19%|#9        | 7.98M/41.5M [02:11<08:34, 68.3kB/s]
     19%|#9        | 7.99M/41.5M [02:11<09:10, 63.8kB/s]
     19%|#9        | 8.00M/41.5M [02:11<09:44, 60.1kB/s]
     19%|#9      
   | 8.02M/41.5M [02:11<08:23, 69.7kB/s]
     19%|#9        | 8.02M/41.5M [02:11<09:07, 64.1kB/s]
     19%|#9        | 8.03M/41.5M [02:12<09:06, 64.1kB/s]
     19%|#9        | 8.05M/41.5M [02:12<18:32, 31.5kB/s]
     20%|#9        | 8.09M/41.5M [02:13<09:31, 61.3kB/s]
     20%|#9        | 8.11M/41.5M [02:13<08:42, 67.0kB/s]
     20%|#9        | 8.12M/41.5M [02:13<09:30, 61.3kB/s]
     20%|#9        | 8.13M/41.5M [02:13<09:52, 59.0kB/s]
     20%|#9        | 8.14M/41.5M [02:14<10:14, 56.9kB/s]
     20%|#9        | 8.15M/41.5M [02:14<10:35, 55.0kB/s]
     20%|#9        | 8.16M/41.5M [02:14<10:01, 58.1kB/s]
     20%|#9        | 8.17M/41.5M [02:14<09:12, 63.2kB/s]
     20%|#9        | 8.18M/41.5M [02:14<09:48, 59.4kB/s]
     20%|#9        | 8.19M/41.5M [02:14<10:06, 57.6kB/s]
     20%|#9        | 8.20M/41.5M [02:15<08:36, 67.6kB/s]
     20%|#9        | 8.21M/41.5M [02:15<09:08, 63.6kB/s]
     20%|#9        | 8.23M/41.5M [02:15<07:54, 73.5kB/s]
     20%|#9        | 8.24M/41.5M [02:15<07:12
 , 80.6kB/s]
     20%|#9        | 8.26M/41.5M [02:15<06:47, 85.6kB/s]
     20%|#9        | 8.27M/41.5M [02:15<06:30, 89.1kB/s]
     20%|#9        | 8.29M/41.5M [02:16<06:20, 91.5kB/s]
     20%|##        | 8.31M/41.5M [02:16<05:30, 105kB/s] 
     20%|##        | 8.33M/41.5M [02:16<05:29, 105kB/s]
     20%|##        | 8.35M/41.5M [02:16<04:55, 118kB/s]
     20%|##        | 8.38M/41.5M [02:16<04:35, 126kB/s]
     20%|##        | 8.40M/41.5M [02:16<04:22, 132kB/s]
     20%|##        | 8.43M/41.5M [02:17<03:49, 151kB/s]
     20%|##        | 8.45M/41.5M [02:17<04:43, 122kB/s]
     20%|##        | 8.49M/41.5M [02:17<03:04, 187kB/s]
     21%|##        | 8.52M/41.5M [02:17<03:38, 159kB/s]
     21%|##        | 8.54M/41.5M [02:17<03:27, 167kB/s]
     21%|##        | 8.56M/41.5M [02:18<04:40, 123kB/s]
     21%|##        | 8.59M/41.5M [02:18<04:23, 131kB/s]
     21%|##        | 8.60M/41.5M [02:18<04:42, 122kB/s]
     21%|##        | 8.62M/41.5M [02:18<05:26, 106kB/s]
     21%|##        | 8.63M/41
 .5M [02:18<05:10, 111kB/s]
     21%|##        | 8.65M/41.5M [02:19<05:21, 107kB/s]
     21%|##        | 8.66M/41.5M [02:19<07:06, 80.6kB/s]
     21%|##        | 8.69M/41.5M [02:19<05:56, 96.6kB/s]
     21%|##        | 8.70M/41.5M [02:19<06:19, 90.5kB/s]
     21%|##1       | 8.72M/41.5M [02:20<07:16, 78.7kB/s]
     21%|##1       | 8.73M/41.5M [02:20<09:01, 63.4kB/s]
     21%|##1       | 8.76M/41.5M [02:20<06:45, 84.5kB/s]
     21%|##1       | 8.77M/41.5M [02:20<06:56, 82.3kB/s]
     21%|##1       | 8.79M/41.5M [02:20<06:38, 86.0kB/s]
     21%|##1       | 8.80M/41.5M [02:21<06:25, 88.9kB/s]
     21%|##1       | 8.82M/41.5M [02:21<09:37, 59.4kB/s]
     21%|##1       | 8.84M/41.5M [02:21<07:03, 80.9kB/s]
     21%|##1       | 8.86M/41.5M [02:21<07:09, 79.7kB/s]
     21%|##1       | 8.88M/41.5M [02:22<10:00, 56.9kB/s]
     21%|##1       | 8.89M/41.5M [02:22<08:50, 64.4kB/s]
     21%|##1       | 8.91M/41.5M [02:22<07:58, 71.4kB/s]
     22%|##1       | 8.92M/41.5M [02:22<07:21, 77.3kB/s]
  
    22%|##1       | 8.94M/41.5M [02:23<08:37, 66.0kB/s]
     22%|##1       | 8.95M/41.5M [02:23<07:48, 72.9kB/s]
     22%|##1       | 8.97M/41.5M [02:23<07:13, 78.7kB/s]
     22%|##1       | 8.98M/41.5M [02:23<08:32, 66.4kB/s]
     22%|##1       | 9.00M/41.5M [02:24<07:44, 73.4kB/s]
     22%|##1       | 9.02M/41.5M [02:24<07:09, 79.2kB/s]
     22%|##1       | 9.03M/41.5M [02:24<06:45, 83.9kB/s]
     22%|##1       | 9.05M/41.5M [02:24<08:13, 69.0kB/s]
     22%|##1       | 9.05M/41.5M [02:24<08:50, 64.1kB/s]
     22%|##1       | 9.07M/41.5M [02:25<09:46, 57.9kB/s]
     22%|##1       | 9.09M/41.5M [02:25<07:20, 77.2kB/s]
     22%|##1       | 9.11M/41.5M [02:25<10:12, 55.4kB/s]
     22%|##1       | 9.12M/41.5M [02:26<09:22, 60.4kB/s]
     22%|##2       | 9.13M/41.5M [02:26<09:45, 58.0kB/s]
     22%|##2       | 9.14M/41.5M [02:26<11:18, 50.0kB/s]
     22%|##2       | 9.16M/41.5M [02:26<11:51, 47.7kB/s]
     22%|##2       | 9.17M/41.5M [02:27<09:47, 57.7kB/s]
     22%|##2       | 9.18M/41.
 5M [02:27<10:09, 55.6kB/s]
     22%|##2       | 9.19M/41.5M [02:27<10:27, 53.9kB/s]
     22%|##2       | 9.20M/41.5M [02:27<10:43, 52.6kB/s]
     22%|##2       | 9.21M/41.5M [02:27<08:44, 64.6kB/s]
     22%|##2       | 9.22M/41.5M [02:28<12:37, 44.7kB/s]
     22%|##2       | 9.23M/41.5M [02:28<11:40, 48.3kB/s]
     22%|##2       | 9.24M/41.5M [02:28<09:50, 57.2kB/s]
     22%|##2       | 9.25M/41.5M [02:28<10:14, 55.0kB/s]
     22%|##2       | 9.26M/41.5M [02:28<10:33, 53.3kB/s]
     22%|##2       | 9.27M/41.5M [02:29<10:49, 52.0kB/s]
     22%|##2       | 9.28M/41.5M [02:29<08:40, 64.9kB/s]
     22%|##2       | 9.29M/41.5M [02:29<09:20, 60.2kB/s]
     22%|##2       | 9.30M/41.5M [02:29<09:54, 56.8kB/s]
     22%|##2       | 9.30M/41.5M [02:29<13:22, 42.1kB/s]
     22%|##2       | 9.32M/41.5M [02:30<10:44, 52.3kB/s]
     22%|##2       | 9.33M/41.5M [02:30<10:14, 54.9kB/s]
     23%|##2       | 9.34M/41.5M [02:30<10:34, 53.1kB/s]
     23%|##2       | 9.34M/41.5M [02:30<13:52, 40.5kB/s]
 
     23%|##2       | 9.36M/41.5M [02:30<10:24, 54.0kB/s]
     23%|##2       | 9.37M/41.5M [02:31<10:39, 52.6kB/s]
     23%|##2       | 9.38M/41.5M [02:31<10:52, 51.6kB/s]
     23%|##2       | 9.39M/41.5M [02:31<08:44, 64.2kB/s]
     23%|##2       | 9.40M/41.5M [02:31<14:40, 38.2kB/s]
     23%|##2       | 9.41M/41.5M [02:32<16:45, 33.5kB/s]
     23%|##2       | 9.42M/41.5M [02:32<12:11, 46.0kB/s]
     23%|##2       | 9.44M/41.5M [02:32<09:48, 57.1kB/s]
     23%|##2       | 9.45M/41.5M [02:32<10:09, 55.1kB/s]
     23%|##2       | 9.45M/41.5M [02:32<10:28, 53.5kB/s]
     23%|##2       | 9.46M/41.5M [02:33<10:43, 52.2kB/s]
     23%|##2       | 9.47M/41.5M [02:33<10:55, 51.2kB/s]
     23%|##2       | 9.48M/41.5M [02:33<08:42, 64.2kB/s]
     23%|##2       | 9.49M/41.5M [02:33<12:02, 46.4kB/s]
     23%|##2       | 9.51M/41.5M [02:33<09:31, 58.7kB/s]
     23%|##2       | 9.52M/41.5M [02:34<09:57, 56.1kB/s]
     23%|##2       | 9.52M/41.5M [02:34<10:19, 54.1kB/s]
     23%|##2       | 9.54M/41
 .5M [02:34<08:26, 66.1kB/s]
     23%|##3       | 9.55M/41.5M [02:34<09:07, 61.1kB/s]
     23%|##3       | 9.55M/41.5M [02:34<09:41, 57.6kB/s]
     23%|##3       | 9.57M/41.5M [02:34<08:04, 69.1kB/s]
     23%|##3       | 9.58M/41.5M [02:35<08:50, 63.1kB/s]
     23%|##3       | 9.59M/41.5M [02:35<07:36, 73.2kB/s]
     23%|##3       | 9.61M/41.5M [02:35<06:55, 80.4kB/s]
     23%|##3       | 9.62M/41.5M [02:35<07:51, 70.9kB/s]
     23%|##3       | 9.63M/41.5M [02:35<07:03, 78.8kB/s]
     23%|##3       | 9.66M/41.5M [02:36<07:18, 76.1kB/s]
     23%|##3       | 9.68M/41.5M [02:36<05:57, 93.4kB/s]
     23%|##3       | 9.70M/41.5M [02:36<05:53, 94.4kB/s]
     23%|##3       | 9.71M/41.5M [02:36<05:50, 95.2kB/s]
     23%|##3       | 9.73M/41.5M [02:36<05:47, 95.7kB/s]
     23%|##3       | 9.74M/41.5M [02:36<05:46, 96.2kB/s]
     24%|##3       | 9.76M/41.5M [02:37<05:44, 96.5kB/s]
     24%|##3       | 9.77M/41.5M [02:37<07:24, 74.9kB/s]
     24%|##3       | 9.79M/41.5M [02:37<06:53, 80.4kB/s]
      24%|##3       | 9.80M/41.5M [02:37<06:32, 84.6kB/s]
     24%|##3       | 9.82M/41.5M [02:37<06:17, 88.0kB/s]
     24%|##3       | 9.84M/41.5M [02:38<07:47, 70.9kB/s]
     24%|##3       | 9.85M/41.5M [02:38<08:51, 62.4kB/s]
     24%|##3       | 9.87M/41.5M [02:38<07:54, 69.9kB/s]
     24%|##3       | 9.88M/41.5M [02:38<08:30, 64.9kB/s]
     24%|##3       | 9.89M/41.5M [02:39<07:34, 72.9kB/s]
     24%|##3       | 9.90M/41.5M [02:39<08:18, 66.5kB/s]
     24%|##3       | 9.91M/41.5M [02:39<13:18, 41.5kB/s]
     24%|##3       | 9.92M/41.5M [02:40<10:53, 50.6kB/s]
     24%|##3       | 9.93M/41.5M [02:40<10:59, 50.2kB/s]
     24%|##3       | 9.94M/41.5M [02:40<11:03, 49.8kB/s]
     24%|##3       | 9.95M/41.5M [02:40<13:58, 39.5kB/s]
     24%|##4       | 9.96M/41.5M [02:41<16:02, 34.4kB/s]
     24%|##4       | 9.97M/41.5M [02:41<14:57, 36.8kB/s]
     24%|##4       | 9.98M/41.5M [02:41<14:03, 39.2kB/s]
     24%|##4       | 9.98M/41.5M [02:41<16:17, 33.8kB/s]
     24%|##4       | 9.99M/4
 1.5M [02:42<18:00, 30.6kB/s]
     24%|##4       | 10.0M/41.5M [02:42<19:17, 28.5kB/s]
     24%|##4       | 10.0M/41.5M [02:42<20:14, 27.2kB/s]
     24%|##4       | 10.0M/41.5M [02:43<17:40, 31.1kB/s]
     24%|##4       | 10.0M/41.5M [02:43<19:06, 28.8kB/s]
     24%|##4       | 10.0M/41.5M [02:43<16:48, 32.7kB/s]
     24%|##4       | 10.0M/41.5M [02:43<18:30, 29.7kB/s]
     24%|##4       | 10.0M/41.5M [02:44<16:21, 33.6kB/s]
     24%|##4       | 10.1M/41.5M [02:44<21:35, 25.5kB/s]
     24%|##4       | 10.1M/41.5M [02:44<14:14, 38.5kB/s]
     24%|##4       | 10.1M/41.5M [02:44<13:30, 40.6kB/s]
     24%|##4       | 10.1M/41.5M [02:45<12:55, 42.5kB/s]
     24%|##4       | 10.1M/41.5M [02:45<12:28, 44.0kB/s]
     24%|##4       | 10.1M/41.5M [02:45<12:08, 45.2kB/s]
     24%|##4       | 10.1M/41.5M [02:45<11:53, 46.1kB/s]
     24%|##4       | 10.1M/41.5M [02:45<09:41, 56.6kB/s]
     24%|##4       | 10.1M/41.5M [02:45<10:04, 54.4kB/s]
     24%|##4       | 10.1M/41.5M [02:46<10:22, 52.8kB/s]
 
     24%|##4       | 10.1M/41.5M [02:46<10:36, 51.6kB/s]
     24%|##4       | 10.2M/41.5M [02:46<08:26, 64.8kB/s]
     25%|##4       | 10.2M/41.5M [02:46<09:31, 57.4kB/s]
     25%|##4       | 10.2M/41.5M [02:47<08:07, 67.3kB/s]
     25%|##4       | 10.2M/41.5M [02:47<07:17, 74.9kB/s]
     25%|##4       | 10.2M/41.5M [02:47<08:03, 67.9kB/s]
     25%|##4       | 10.2M/41.5M [02:47<07:11, 76.0kB/s]
     25%|##4       | 10.2M/41.5M [02:47<07:59, 68.4kB/s]
     25%|##4       | 10.3M/41.5M [02:47<07:07, 76.7kB/s]
     25%|##4       | 10.3M/41.5M [02:48<06:35, 82.7kB/s]
     25%|##4       | 10.3M/41.5M [02:48<06:16, 86.9kB/s]
     25%|##4       | 10.3M/41.5M [02:48<06:03, 90.0kB/s]
     25%|##4       | 10.3M/41.5M [02:48<05:54, 92.2kB/s]
     25%|##4       | 10.3M/41.5M [02:48<06:14, 87.2kB/s]
     25%|##4       | 10.4M/41.5M [02:48<06:02, 90.1kB/s]
     25%|##4       | 10.4M/41.5M [02:49<07:09, 75.9kB/s]
     25%|##5       | 10.4M/41.5M [02:49<06:40, 81.4kB/s]
     25%|##5       | 10.4M/
 41.5M [02:49<06:20, 85.6kB/s]
     25%|##5       | 10.4M/41.5M [02:49<06:06, 88.8kB/s]
     25%|##5       | 10.4M/41.5M [02:49<05:11, 105kB/s] 
     25%|##5       | 10.5M/41.5M [02:50<05:17, 103kB/s]
     25%|##5       | 10.5M/41.5M [02:50<05:21, 101kB/s]
     25%|##5       | 10.5M/41.5M [02:50<05:25, 100kB/s]
     25%|##5       | 10.5M/41.5M [02:50<07:04, 76.6kB/s]
     25%|##5       | 10.5M/41.5M [02:51<07:11, 75.2kB/s]
     25%|##5       | 10.5M/41.5M [02:51<05:58, 90.4kB/s]
     25%|##5       | 10.6M/41.5M [02:51<06:14, 86.7kB/s]
     25%|##5       | 10.6M/41.5M [02:51<08:36, 62.7kB/s]
     26%|##5       | 10.6M/41.5M [02:52<10:44, 50.3kB/s]
     26%|##5       | 10.6M/41.5M [02:52<09:36, 56.2kB/s]
     26%|##5       | 10.6M/41.5M [02:52<09:52, 54.6kB/s]
     26%|##5       | 10.6M/41.5M [02:52<12:20, 43.7kB/s]
     26%|##5       | 10.6M/41.5M [02:53<12:03, 44.7kB/s]
     26%|##5       | 10.6M/41.5M [02:53<11:48, 45.6kB/s]
     26%|##5       | 10.6M/41.5M [02:53<11:37, 46.4kB/s]
 
     26%|##5       | 10.6M/41.5M [02:53<11:28, 47.0kB/s]
     26%|##5       | 10.7M/41.5M [02:53<11:18, 47.7kB/s]
     26%|##5       | 10.7M/41.5M [02:54<14:37, 36.8kB/s]
     26%|##5       | 10.7M/41.5M [02:54<10:57, 49.1kB/s]
     26%|##5       | 10.7M/41.5M [02:54<13:27, 40.0kB/s]
     26%|##5       | 10.7M/41.5M [02:55<12:52, 41.8kB/s]
     26%|##5       | 10.7M/41.5M [02:55<12:24, 43.4kB/s]
     26%|##5       | 10.7M/41.5M [02:55<12:02, 44.7kB/s]
     26%|##5       | 10.7M/41.5M [02:55<11:45, 45.7kB/s]
     26%|##5       | 10.7M/41.5M [02:55<14:41, 36.6kB/s]
     26%|##5       | 10.7M/41.5M [02:56<16:50, 31.9kB/s]
     26%|##5       | 10.8M/41.5M [02:56<11:43, 45.8kB/s]
     26%|##5       | 10.8M/41.5M [02:56<11:33, 46.5kB/s]
     26%|##5       | 10.8M/41.5M [02:56<11:24, 47.0kB/s]
     26%|##6       | 10.8M/41.5M [02:56<08:54, 60.3kB/s]
     26%|##6       | 10.8M/41.5M [02:57<11:57, 44.8kB/s]
     26%|##6       | 10.8M/41.5M [02:57<09:22, 57.1kB/s]
     26%|##6       | 10.8M/41
 .5M [02:57<09:44, 55.0kB/s]
     26%|##6       | 10.8M/41.5M [02:57<10:02, 53.3kB/s]
     26%|##6       | 10.8M/41.5M [02:57<10:17, 52.1kB/s]
     26%|##6       | 10.8M/41.5M [02:58<09:44, 55.0kB/s]
     26%|##6       | 10.9M/41.5M [02:58<13:08, 40.8kB/s]
     26%|##6       | 10.9M/41.5M [02:58<10:23, 51.5kB/s]
     26%|##6       | 10.9M/41.5M [02:58<10:31, 50.8kB/s]
     26%|##6       | 10.9M/41.5M [02:58<10:38, 50.2kB/s]
     26%|##6       | 10.9M/41.5M [02:59<10:44, 49.8kB/s]
     26%|##6       | 10.9M/41.5M [02:59<10:16, 52.1kB/s]
     26%|##6       | 10.9M/41.5M [02:59<08:34, 62.3kB/s]
     26%|##6       | 10.9M/41.5M [02:59<09:09, 58.4kB/s]
     26%|##6       | 10.9M/41.5M [02:59<11:55, 44.8kB/s]
     26%|##6       | 11.0M/41.5M [03:00<08:27, 63.0kB/s]
     26%|##6       | 11.0M/41.5M [03:00<08:58, 59.4kB/s]
     26%|##6       | 11.0M/41.5M [03:00<09:24, 56.7kB/s]
     26%|##6       | 11.0M/41.5M [03:00<07:53, 67.6kB/s]
     26%|##6       | 11.0M/41.5M [03:00<08:33, 62.3kB/s]
      27%|##6       | 11.0M/41.5M [03:00<09:06, 58.5kB/s]
     27%|##6       | 11.0M/41.5M [03:01<07:38, 69.7kB/s]
     27%|##6       | 11.0M/41.5M [03:01<08:22, 63.6kB/s]
     27%|##6       | 11.0M/41.5M [03:01<09:22, 56.8kB/s]
     27%|##6       | 11.1M/41.5M [03:01<07:58, 66.7kB/s]
     27%|##6       | 11.1M/41.5M [03:01<07:07, 74.7kB/s]
     27%|##6       | 11.1M/41.5M [03:02<07:51, 67.6kB/s]
     27%|##6       | 11.1M/41.5M [03:02<08:31, 62.3kB/s]
     27%|##6       | 11.1M/41.5M [03:02<07:20, 72.3kB/s]
     27%|##6       | 11.1M/41.5M [03:02<06:40, 79.5kB/s]
     27%|##6       | 11.1M/41.5M [03:02<07:31, 70.5kB/s]
     27%|##6       | 11.1M/41.5M [03:02<08:16, 64.1kB/s]
     27%|##6       | 11.1M/41.5M [03:03<12:13, 43.4kB/s]
     27%|##6       | 11.2M/41.5M [03:03<07:51, 67.5kB/s]
     27%|##6       | 11.2M/41.5M [03:03<08:26, 62.8kB/s]
     27%|##6       | 11.2M/41.5M [03:03<08:58, 59.0kB/s]
     27%|##6       | 11.2M/41.5M [03:04<07:36, 69.5kB/s]
     27%|##7       | 11.2M/4
 1.5M [03:04<08:18, 63.7kB/s]
     27%|##7       | 11.2M/41.5M [03:04<08:54, 59.4kB/s]
     27%|##7       | 11.2M/41.5M [03:04<07:30, 70.4kB/s]
     27%|##7       | 11.2M/41.5M [03:04<10:42, 49.4kB/s]
     27%|##7       | 11.2M/41.5M [03:05<08:37, 61.2kB/s]
     27%|##7       | 11.3M/41.5M [03:05<11:22, 46.4kB/s]
     27%|##7       | 11.3M/41.5M [03:06<11:08, 47.4kB/s]
     27%|##7       | 11.3M/41.5M [03:06<11:06, 47.5kB/s]
     27%|##7       | 11.3M/41.5M [03:06<09:14, 57.1kB/s]
     27%|##7       | 11.3M/41.5M [03:06<09:32, 55.3kB/s]
     27%|##7       | 11.3M/41.5M [03:06<09:48, 53.7kB/s]
     27%|##7       | 11.3M/41.5M [03:06<10:02, 52.4kB/s]
     27%|##7       | 11.3M/41.5M [03:07<10:14, 51.4kB/s]
     27%|##7       | 11.4M/41.5M [03:07<10:23, 50.7kB/s]
     27%|##7       | 11.4M/41.5M [03:07<10:30, 50.1kB/s]
     27%|##7       | 11.4M/41.5M [03:07<10:35, 49.7kB/s]
     27%|##7       | 11.4M/41.5M [03:07<10:39, 49.4kB/s]
     27%|##7       | 11.4M/41.5M [03:07<10:41, 49.2kB/s]
 
     27%|##7       | 11.4M/41.5M [03:08<10:43, 49.0kB/s]
     27%|##7       | 11.4M/41.5M [03:08<13:56, 37.7kB/s]
     27%|##7       | 11.4M/41.5M [03:08<13:00, 40.4kB/s]
     28%|##7       | 11.4M/41.5M [03:08<12:20, 42.6kB/s]
     28%|##7       | 11.4M/41.5M [03:08<11:53, 44.2kB/s]
     28%|##7       | 11.4M/41.5M [03:09<11:33, 45.5kB/s]
     28%|##7       | 11.4M/41.5M [03:09<17:47, 29.5kB/s]
     28%|##7       | 11.5M/41.5M [03:09<12:05, 43.4kB/s]
     28%|##7       | 11.5M/41.5M [03:09<11:45, 44.6kB/s]
     28%|##7       | 11.5M/41.5M [03:10<11:30, 45.6kB/s]
     28%|##7       | 11.5M/41.5M [03:10<11:18, 46.4kB/s]
     28%|##7       | 11.5M/41.5M [03:10<14:10, 37.0kB/s]
     28%|##7       | 11.5M/41.5M [03:10<13:17, 39.5kB/s]
     28%|##7       | 11.5M/41.5M [03:11<12:40, 41.3kB/s]
     28%|##7       | 11.5M/41.5M [03:11<09:42, 54.0kB/s]
     28%|##7       | 11.5M/41.5M [03:11<09:56, 52.7kB/s]
     28%|##7       | 11.5M/41.5M [03:11<10:07, 51.7kB/s]
     28%|##7       | 11.5M/
 41.5M [03:11<10:17, 50.9kB/s]
     28%|##7       | 11.6M/41.5M [03:11<10:24, 50.2kB/s]
     28%|##7       | 11.6M/41.5M [03:12<10:30, 49.8kB/s]
     28%|##7       | 11.6M/41.5M [03:12<10:34, 49.4kB/s]
     28%|##7       | 11.6M/41.5M [03:12<10:37, 49.2kB/s]
     28%|##7       | 11.6M/41.5M [03:12<10:39, 49.0kB/s]
     28%|##7       | 11.6M/41.5M [03:12<10:40, 48.9kB/s]
     28%|##7       | 11.6M/41.5M [03:13<10:41, 48.9kB/s]
     28%|##8       | 11.6M/41.5M [03:13<08:15, 63.2kB/s]
     28%|##8       | 11.6M/41.5M [03:13<08:51, 58.9kB/s]
     28%|##8       | 11.6M/41.5M [03:13<09:20, 55.8kB/s]
     28%|##8       | 11.6M/41.5M [03:13<07:38, 68.2kB/s]
     28%|##8       | 11.7M/41.5M [03:13<08:21, 62.4kB/s]
     28%|##8       | 11.7M/41.5M [03:14<08:56, 58.3kB/s]
     28%|##8       | 11.7M/41.5M [03:14<09:25, 55.3kB/s]
     28%|##8       | 11.7M/41.5M [03:14<07:40, 67.9kB/s]
     28%|##8       | 11.7M/41.5M [03:14<08:22, 62.2kB/s]
     28%|##8       | 11.7M/41.5M [03:14<09:18, 55.9kB/s
 ]
     28%|##8       | 11.7M/41.5M [03:15<07:51, 66.2kB/s]
     28%|##8       | 11.7M/41.5M [03:15<07:00, 74.2kB/s]
     28%|##8       | 11.8M/41.5M [03:15<07:42, 67.4kB/s]
     28%|##8       | 11.8M/41.5M [03:15<06:52, 75.6kB/s]
     28%|##8       | 11.8M/41.5M [03:15<09:49, 52.9kB/s]
     28%|##8       | 11.8M/41.5M [03:16<06:53, 75.3kB/s]
     28%|##8       | 11.8M/41.5M [03:16<06:25, 80.7kB/s]
     29%|##8       | 11.8M/41.5M [03:16<07:43, 67.1kB/s]
     29%|##8       | 11.8M/41.5M [03:16<10:08, 51.1kB/s]
     29%|##8       | 11.8M/41.5M [03:17<10:14, 50.6kB/s]
     29%|##8       | 11.9M/41.5M [03:17<08:25, 61.4kB/s]
     29%|##8       | 11.9M/41.5M [03:17<08:55, 58.0kB/s]
     29%|##8       | 11.9M/41.5M [03:17<11:42, 44.2kB/s]
     29%|##8       | 11.9M/41.5M [03:17<11:26, 45.2kB/s]
     29%|##8       | 11.9M/41.5M [03:18<11:13, 46.1kB/s]
     29%|##8       | 11.9M/41.5M [03:18<11:03, 46.7kB/s]
     29%|##8       | 11.9M/41.5M [03:18<10:56, 47.3kB/s]
     29%|##8       | 11.9M
 /41.5M [03:18<13:53, 37.2kB/s]
     29%|##8       | 11.9M/41.5M [03:18<10:02, 51.5kB/s]
     29%|##8       | 11.9M/41.5M [03:19<10:10, 50.8kB/s]
     29%|##8       | 11.9M/41.5M [03:19<10:17, 50.2kB/s]
     29%|##8       | 12.0M/41.5M [03:19<10:22, 49.8kB/s]
     29%|##8       | 12.0M/41.5M [03:19<10:26, 49.5kB/s]
     29%|##8       | 12.0M/41.5M [03:19<08:09, 63.2kB/s]
     29%|##8       | 12.0M/41.5M [03:19<08:44, 59.0kB/s]
     29%|##8       | 12.0M/41.5M [03:20<09:12, 55.9kB/s]
     29%|##8       | 12.0M/41.5M [03:20<07:33, 68.2kB/s]
     29%|##8       | 12.0M/41.5M [03:20<08:15, 62.4kB/s]
     29%|##8       | 12.0M/41.5M [03:20<07:04, 72.8kB/s]
     29%|##9       | 12.0M/41.5M [03:20<07:51, 65.5kB/s]
     29%|##9       | 12.1M/41.5M [03:21<09:25, 54.6kB/s]
     29%|##9       | 12.1M/41.5M [03:21<07:28, 68.8kB/s]
     29%|##9       | 12.1M/41.5M [03:21<10:14, 50.2kB/s]
     29%|##9       | 12.1M/41.5M [03:21<10:18, 49.8kB/s]
     29%|##9       | 12.1M/41.5M [03:22<12:56, 39.7kB/
 s]
     29%|##9       | 12.1M/41.5M [03:22<12:19, 41.7kB/s]
     29%|##9       | 12.1M/41.5M [03:22<17:31, 29.3kB/s]
     29%|##9       | 12.1M/41.5M [03:23<14:29, 35.4kB/s]
     29%|##9       | 12.1M/41.5M [03:23<16:02, 32.0kB/s]
     29%|##9       | 12.1M/41.5M [03:23<17:18, 29.6kB/s]
     29%|##9       | 12.1M/41.5M [03:24<18:18, 28.0kB/s]
     29%|##9       | 12.2M/41.5M [03:24<16:11, 31.7kB/s]
     29%|##9       | 12.2M/41.5M [03:24<23:30, 21.8kB/s]
     29%|##9       | 12.2M/41.5M [03:25<20:03, 25.5kB/s]
     29%|##9       | 12.2M/41.5M [03:25<20:17, 25.2kB/s]
     29%|##9       | 12.2M/41.5M [03:26<20:28, 25.0kB/s]
     29%|##9       | 12.2M/41.5M [03:26<23:27, 21.8kB/s]
     29%|##9       | 12.2M/41.5M [03:26<15:32, 32.9kB/s]
     29%|##9       | 12.2M/41.5M [03:27<16:50, 30.4kB/s]
     29%|##9       | 12.2M/41.5M [03:27<15:14, 33.5kB/s]
     30%|##9       | 12.2M/41.5M [03:27<16:45, 30.5kB/s]
     30%|##9       | 12.2M/41.5M [03:27<15:02, 34.0kB/s]
     30%|##9       | 12.3
 M/41.5M [03:27<13:45, 37.1kB/s]
     30%|##9       | 12.3M/41.5M [03:28<12:48, 39.8kB/s]
     30%|##9       | 12.3M/41.5M [03:28<12:08, 42.1kB/s]
     30%|##9       | 12.3M/41.5M [03:28<09:00, 56.6kB/s]
     30%|##9       | 12.3M/41.5M [03:28<09:22, 54.5kB/s]
     30%|##9       | 12.3M/41.5M [03:28<09:39, 52.8kB/s]
     30%|##9       | 12.3M/41.5M [03:29<07:46, 65.5kB/s]
     30%|##9       | 12.3M/41.5M [03:29<07:19, 69.6kB/s]
     30%|##9       | 12.3M/41.5M [03:29<07:24, 68.7kB/s]
     30%|##9       | 12.4M/41.5M [03:29<07:04, 71.9kB/s]
     30%|##9       | 12.4M/41.5M [03:29<06:25, 79.2kB/s]
     30%|##9       | 12.4M/41.5M [03:29<06:01, 84.4kB/s]
     30%|##9       | 12.4M/41.5M [03:30<05:46, 88.1kB/s]
     30%|##9       | 12.4M/41.5M [03:30<05:35, 90.8kB/s]
     30%|##9       | 12.4M/41.5M [03:30<05:04, 100kB/s] 
     30%|###       | 12.5M/41.5M [03:30<05:06, 99.3kB/s]
     30%|###       | 12.5M/41.5M [03:30<06:44, 75.3kB/s]
     30%|###       | 12.5M/41.5M [03:31<05:06, 99.2kB
 /s]
     30%|###       | 12.5M/41.5M [03:31<05:07, 98.7kB/s]
     30%|###       | 12.5M/41.5M [03:31<04:48, 105kB/s] 
     30%|###       | 12.5M/41.5M [03:31<04:54, 103kB/s]
     30%|###       | 12.6M/41.5M [03:31<04:59, 101kB/s]
     30%|###       | 12.6M/41.5M [03:31<04:25, 114kB/s]
     30%|###       | 12.6M/41.5M [03:32<04:37, 109kB/s]
     30%|###       | 12.6M/41.5M [03:32<04:11, 120kB/s]
     30%|###       | 12.6M/41.5M [03:32<04:26, 114kB/s]
     31%|###       | 12.7M/41.5M [03:32<06:01, 83.5kB/s]
     31%|###       | 12.7M/41.5M [03:32<04:32, 111kB/s] 
     31%|###       | 12.7M/41.5M [03:33<05:54, 85.2kB/s]
     31%|###       | 12.7M/41.5M [03:33<06:30, 77.2kB/s]
     31%|###       | 12.8M/41.5M [03:33<05:30, 91.1kB/s]
     31%|###       | 12.8M/41.5M [03:34<06:38, 75.6kB/s]
     31%|###       | 12.8M/41.5M [03:34<07:53, 63.6kB/s]
     31%|###       | 12.8M/41.5M [03:34<08:15, 60.7kB/s]
     31%|###       | 12.8M/41.5M [03:34<07:18, 68.6kB/s]
     31%|###       | 12.8M/41.
 5M [03:34<07:50, 63.9kB/s]
     31%|###       | 12.8M/41.5M [03:35<10:25, 48.0kB/s]
     31%|###       | 12.8M/41.5M [03:35<08:29, 59.0kB/s]
     31%|###       | 12.8M/41.5M [03:35<08:51, 56.5kB/s]
     31%|###       | 12.9M/41.5M [03:36<12:47, 39.1kB/s]
     31%|###1      | 12.9M/41.5M [03:36<10:42, 46.7kB/s]
     31%|###1      | 12.9M/41.5M [03:36<12:52, 38.8kB/s]
     31%|###1      | 12.9M/41.5M [03:36<12:15, 40.8kB/s]
     31%|###1      | 12.9M/41.5M [03:37<11:44, 42.6kB/s]
     31%|###1      | 12.9M/41.5M [03:37<11:20, 44.0kB/s]
     31%|###1      | 12.9M/41.5M [03:37<11:02, 45.2kB/s]
     31%|###1      | 12.9M/41.5M [03:37<11:16, 44.3kB/s]
     31%|###1      | 12.9M/41.5M [03:37<08:23, 59.4kB/s]
     31%|###1      | 12.9M/41.5M [03:38<08:47, 56.7kB/s]
     31%|###1      | 13.0M/41.5M [03:38<11:33, 43.1kB/s]
     31%|###1      | 13.0M/41.5M [03:38<11:13, 44.4kB/s]
     31%|###1      | 13.0M/41.5M [03:38<10:57, 45.5kB/s]
     31%|###1      | 13.0M/41.5M [03:39<13:34, 36.7kB/s]
 
     31%|###1      | 13.0M/41.5M [03:39<15:30, 32.1kB/s]
     31%|###1      | 13.0M/41.5M [03:39<10:52, 45.8kB/s]
     31%|###1      | 13.0M/41.5M [03:39<11:19, 43.9kB/s]
     31%|###1      | 13.0M/41.5M [03:39<11:02, 45.1kB/s]
     31%|###1      | 13.0M/41.5M [03:40<10:49, 46.0kB/s]
     31%|###1      | 13.0M/41.5M [03:40<10:39, 46.7kB/s]
     31%|###1      | 13.0M/41.5M [03:40<09:48, 50.7kB/s]
     31%|###1      | 13.0M/41.5M [03:40<09:55, 50.1kB/s]
     31%|###1      | 13.1M/41.5M [03:40<10:00, 49.7kB/s]
     32%|###1      | 13.1M/41.5M [03:41<10:40, 46.5kB/s]
     32%|###1      | 13.1M/41.5M [03:41<07:59, 62.2kB/s]
     32%|###1      | 13.1M/41.5M [03:41<08:28, 58.6kB/s]
     32%|###1      | 13.1M/41.5M [03:41<08:51, 56.0kB/s]
     32%|###1      | 13.1M/41.5M [03:41<09:11, 53.9kB/s]
     32%|###1      | 13.1M/41.5M [03:41<07:28, 66.3kB/s]
     32%|###1      | 13.1M/41.5M [03:42<08:05, 61.3kB/s]
     32%|###1      | 13.1M/41.5M [03:42<09:26, 52.5kB/s]
     32%|###1      | 13.2M/41
 .5M [03:42<07:23, 67.0kB/s]
     32%|###1      | 13.2M/41.5M [03:42<07:57, 62.2kB/s]
     32%|###1      | 13.2M/41.5M [03:42<08:27, 58.5kB/s]
     32%|###1      | 13.2M/41.5M [03:43<08:52, 55.7kB/s]
     32%|###1      | 13.2M/41.5M [03:43<09:12, 53.7kB/s]
     32%|###1      | 13.2M/41.5M [03:43<07:26, 66.4kB/s]
     32%|###1      | 13.2M/41.5M [03:43<08:04, 61.2kB/s]
     32%|###1      | 13.2M/41.5M [03:43<06:52, 71.8kB/s]
     32%|###1      | 13.2M/41.5M [03:43<07:35, 65.0kB/s]
     32%|###1      | 13.3M/41.5M [03:44<06:36, 74.6kB/s]
     32%|###1      | 13.3M/41.5M [03:44<07:22, 66.9kB/s]
     32%|###2      | 13.3M/41.5M [03:44<06:29, 76.0kB/s]
     32%|###2      | 13.3M/41.5M [03:44<05:58, 82.4kB/s]
     32%|###2      | 13.3M/41.5M [03:44<05:40, 86.8kB/s]
     32%|###2      | 13.3M/41.5M [03:44<05:28, 90.0kB/s]
     32%|###2      | 13.3M/41.5M [03:45<05:20, 92.2kB/s]
     32%|###2      | 13.4M/41.5M [03:45<06:49, 72.1kB/s]
     32%|###2      | 13.4M/41.5M [03:45<06:16, 78.2kB/s]
      32%|###2      | 13.4M/41.5M [03:46<07:47, 63.0kB/s]
     32%|###2      | 13.4M/41.5M [03:46<07:02, 69.8kB/s]
     32%|###2      | 13.4M/41.5M [03:46<07:31, 65.1kB/s]
     32%|###2      | 13.4M/41.5M [03:46<06:44, 72.8kB/s]
     32%|###2      | 13.4M/41.5M [03:46<07:21, 66.7kB/s]
     32%|###2      | 13.5M/41.5M [03:46<07:55, 61.9kB/s]
     32%|###2      | 13.5M/41.5M [03:47<06:50, 71.5kB/s]
     32%|###2      | 13.5M/41.5M [03:47<07:31, 65.1kB/s]
     33%|###2      | 13.5M/41.5M [03:47<08:06, 60.4kB/s]
     33%|###2      | 13.5M/41.5M [03:47<06:53, 71.1kB/s]
     33%|###2      | 13.5M/41.5M [03:47<09:48, 49.9kB/s]
     33%|###2      | 13.5M/41.5M [03:48<06:39, 73.4kB/s]
     33%|###2      | 13.5M/41.5M [03:48<06:31, 74.8kB/s]
     33%|###2      | 13.6M/41.5M [03:48<08:47, 55.5kB/s]
     33%|###2      | 13.6M/41.5M [03:49<09:10, 53.2kB/s]
     33%|###2      | 13.6M/41.5M [03:49<07:54, 61.7kB/s]
     33%|###2      | 13.6M/41.5M [03:49<08:16, 58.9kB/s]
     33%|###2      | 13.6M/4
 1.5M [03:49<08:37, 56.5kB/s]
     33%|###2      | 13.6M/41.5M [03:49<11:11, 43.5kB/s]
     33%|###2      | 13.6M/41.5M [03:50<10:54, 44.7kB/s]
     33%|###2      | 13.6M/41.5M [03:50<08:58, 54.3kB/s]
     33%|###2      | 13.6M/41.5M [03:50<13:11, 36.9kB/s]
     33%|###2      | 13.7M/41.5M [03:51<10:26, 46.5kB/s]
     33%|###2      | 13.7M/41.5M [03:51<10:20, 47.0kB/s]
     33%|###2      | 13.7M/41.5M [03:51<12:38, 38.5kB/s]
     33%|###2      | 13.7M/41.5M [03:51<11:57, 40.6kB/s]
     33%|###3      | 13.7M/41.5M [03:52<11:07, 43.7kB/s]
     33%|###3      | 13.7M/41.5M [03:52<08:49, 55.0kB/s]
     33%|###3      | 13.7M/41.5M [03:52<09:03, 53.5kB/s]
     33%|###3      | 13.7M/41.5M [03:52<11:34, 41.9kB/s]
     33%|###3      | 13.8M/41.5M [03:53<09:07, 53.1kB/s]
     33%|###3      | 13.8M/41.5M [03:53<09:16, 52.2kB/s]
     33%|###3      | 13.8M/41.5M [03:53<13:42, 35.3kB/s]
     33%|###3      | 13.8M/41.5M [03:54<12:16, 39.4kB/s]
     33%|###3      | 13.8M/41.5M [03:54<12:33, 38.6kB/s]
 
     33%|###3      | 13.8M/41.5M [03:54<18:04, 26.8kB/s]
     33%|###3      | 13.8M/41.5M [03:55<13:33, 35.7kB/s]
     33%|###3      | 13.8M/41.5M [03:55<14:57, 32.3kB/s]
     33%|###3      | 13.8M/41.5M [03:55<13:44, 35.2kB/s]
     33%|###3      | 13.8M/41.5M [03:55<15:18, 31.6kB/s]
     33%|###3      | 13.9M/41.5M [03:56<10:56, 44.2kB/s]
     33%|###3      | 13.9M/41.5M [03:56<10:41, 45.1kB/s]
     33%|###3      | 13.9M/41.5M [03:56<12:58, 37.2kB/s]
     33%|###3      | 13.9M/41.5M [03:56<12:51, 37.5kB/s]
     33%|###3      | 13.9M/41.5M [03:57<12:02, 40.0kB/s]
     33%|###3      | 13.9M/41.5M [03:57<11:26, 42.1kB/s]
     34%|###3      | 13.9M/41.5M [03:57<11:00, 43.8kB/s]
     34%|###3      | 13.9M/41.5M [03:57<10:40, 45.1kB/s]
     34%|###3      | 13.9M/41.5M [03:57<13:21, 36.1kB/s]
     34%|###3      | 13.9M/41.5M [03:58<12:27, 38.7kB/s]
     34%|###3      | 13.9M/41.5M [03:58<11:49, 40.7kB/s]
     34%|###3      | 14.0M/41.5M [03:58<09:00, 53.4kB/s]
     34%|###3      | 14.0M/
 41.5M [03:58<09:12, 52.3kB/s]
     34%|###3      | 14.0M/41.5M [03:58<09:21, 51.3kB/s]
     34%|###3      | 14.0M/41.5M [03:59<09:30, 50.6kB/s]
     34%|###3      | 14.0M/41.5M [03:59<09:36, 50.0kB/s]
     34%|###3      | 14.0M/41.5M [03:59<09:40, 49.6kB/s]
     34%|###3      | 14.0M/41.5M [03:59<12:32, 38.3kB/s]
     34%|###3      | 14.0M/41.5M [03:59<09:07, 52.6kB/s]
     34%|###3      | 14.0M/41.5M [04:00<09:18, 51.6kB/s]
     34%|###3      | 14.0M/41.5M [04:00<09:26, 50.8kB/s]
     34%|###3      | 14.1M/41.5M [04:00<07:31, 63.7kB/s]
     34%|###3      | 14.1M/41.5M [04:00<10:22, 46.2kB/s]
     34%|###3      | 14.1M/41.5M [04:00<08:11, 58.5kB/s]
     34%|###3      | 14.1M/41.5M [04:01<08:33, 56.0kB/s]
     34%|###3      | 14.1M/41.5M [04:01<11:13, 42.6kB/s]
     34%|###4      | 14.1M/41.5M [04:02<13:09, 36.4kB/s]
     34%|###4      | 14.1M/41.5M [04:02<12:28, 38.3kB/s]
     34%|###4      | 14.1M/41.5M [04:02<13:20, 35.8kB/s]
     34%|###4      | 14.1M/41.5M [04:02<13:13, 36.1kB/s
 ]
     34%|###4      | 14.1M/41.5M [04:03<12:22, 38.6kB/s]
     34%|###4      | 14.2M/41.5M [04:03<11:42, 40.8kB/s]
     34%|###4      | 14.2M/41.5M [04:03<11:11, 42.6kB/s]
     34%|###4      | 14.2M/41.5M [04:03<10:48, 44.2kB/s]
     34%|###4      | 14.2M/41.5M [04:03<10:31, 45.4kB/s]
     34%|###4      | 14.2M/41.5M [04:03<09:26, 50.5kB/s]
     34%|###4      | 14.2M/41.5M [04:04<09:32, 50.0kB/s]
     34%|###4      | 14.2M/41.5M [04:04<08:06, 58.8kB/s]
     34%|###4      | 14.2M/41.5M [04:04<10:56, 43.6kB/s]
     34%|###4      | 14.2M/41.5M [04:04<07:24, 64.2kB/s]
     34%|###4      | 14.2M/41.5M [04:05<09:47, 48.6kB/s]
     34%|###4      | 14.3M/41.5M [04:05<11:24, 41.7kB/s]
     34%|###4      | 14.3M/41.5M [04:05<10:37, 44.8kB/s]
     34%|###4      | 14.3M/41.5M [04:06<09:49, 48.4kB/s]
     34%|###4      | 14.3M/41.5M [04:06<09:48, 48.4kB/s]
     34%|###4      | 14.3M/41.5M [04:06<09:47, 48.5kB/s]
     35%|###4      | 14.3M/41.5M [04:06<09:47, 48.5kB/s]
     35%|###4      | 14.3M
 /41.5M [04:06<09:46, 48.6kB/s]
     35%|###4      | 14.3M/41.5M [04:07<09:45, 48.6kB/s]
     35%|###4      | 14.3M/41.5M [04:07<09:45, 48.6kB/s]
     35%|###4      | 14.4M/41.5M [04:07<09:45, 48.6kB/s]
     35%|###4      | 14.4M/41.5M [04:07<07:35, 62.5kB/s]
     35%|###4      | 14.4M/41.5M [04:07<08:06, 58.5kB/s]
     35%|###4      | 14.4M/41.5M [04:07<06:46, 69.9kB/s]
     35%|###4      | 14.4M/41.5M [04:08<09:38, 49.1kB/s]
     35%|###4      | 14.4M/41.5M [04:08<06:29, 72.9kB/s]
     35%|###4      | 14.4M/41.5M [04:08<05:58, 79.1kB/s]
     35%|###4      | 14.5M/41.5M [04:08<05:37, 84.0kB/s]
     35%|###4      | 14.5M/41.5M [04:09<06:52, 68.7kB/s]
     35%|###4      | 14.5M/41.5M [04:09<06:15, 75.5kB/s]
     35%|###4      | 14.5M/41.5M [04:09<05:49, 80.9kB/s]
     35%|###4      | 14.5M/41.5M [04:09<08:27, 55.7kB/s]
     35%|###5      | 14.5M/41.5M [04:10<12:27, 37.8kB/s]
     35%|###5      | 14.6M/41.5M [04:10<06:33, 71.7kB/s]
     35%|###5      | 14.6M/41.5M [04:10<06:08, 76.6kB/
 s]
     35%|###5      | 14.6M/41.5M [04:10<05:48, 81.0kB/s]
     35%|###5      | 14.6M/41.5M [04:11<06:50, 68.7kB/s]
     35%|###5      | 14.6M/41.5M [04:11<05:30, 85.2kB/s]
     35%|###5      | 14.6M/41.5M [04:11<05:20, 87.9kB/s]
     35%|###5      | 14.7M/41.5M [04:11<06:29, 72.3kB/s]
     35%|###5      | 14.7M/41.5M [04:12<06:01, 77.8kB/s]
     35%|###5      | 14.7M/41.5M [04:12<07:02, 66.5kB/s]
     35%|###5      | 14.7M/41.5M [04:12<09:06, 51.4kB/s]
     35%|###5      | 14.7M/41.5M [04:12<07:42, 60.7kB/s]
     35%|###5      | 14.7M/41.5M [04:13<08:03, 58.0kB/s]
     36%|###5      | 14.7M/41.5M [04:13<08:23, 55.8kB/s]
     36%|###5      | 14.7M/41.5M [04:13<08:40, 53.9kB/s]
     36%|###5      | 14.8M/41.5M [04:13<07:07, 65.6kB/s]
     36%|###5      | 14.8M/41.5M [04:13<07:40, 60.9kB/s]
     36%|###5      | 14.8M/41.5M [04:13<06:33, 71.2kB/s]
     36%|###5      | 14.8M/41.5M [04:14<07:12, 64.7kB/s]
     36%|###5      | 14.8M/41.5M [04:14<06:16, 74.2kB/s]
     36%|###5      | 14.8
 M/41.5M [04:14<06:59, 66.7kB/s]
     36%|###5      | 14.8M/41.5M [04:14<06:35, 70.7kB/s]
     36%|###5      | 14.8M/41.5M [04:14<05:57, 78.2kB/s]
     36%|###5      | 14.9M/41.5M [04:15<05:33, 83.7kB/s]
     36%|###5      | 14.9M/41.5M [04:15<06:51, 67.9kB/s]
     36%|###5      | 14.9M/41.5M [04:15<05:21, 86.8kB/s]
     36%|###5      | 14.9M/41.5M [04:15<06:32, 71.1kB/s]
     36%|###5      | 14.9M/41.5M [04:16<06:01, 77.0kB/s]
     36%|###6      | 14.9M/41.5M [04:16<07:02, 65.9kB/s]
     36%|###6      | 15.0M/41.5M [04:16<08:49, 52.5kB/s]
     36%|###6      | 15.0M/41.5M [04:17<09:21, 49.6kB/s]
     36%|###6      | 15.0M/41.5M [04:17<09:24, 49.2kB/s]
     36%|###6      | 15.0M/41.5M [04:17<07:55, 58.4kB/s]
     36%|###6      | 15.0M/41.5M [04:18<11:45, 39.3kB/s]
     36%|###6      | 15.0M/41.5M [04:18<11:19, 40.9kB/s]
     36%|###6      | 15.0M/41.5M [04:19<18:07, 25.5kB/s]
     36%|###6      | 15.0M/41.5M [04:19<16:12, 28.5kB/s]
     36%|###6      | 15.0M/41.5M [04:19<16:52, 27.4kB
 /s]
     36%|###6      | 15.1M/41.5M [04:19<15:29, 29.8kB/s]
     36%|###6      | 15.1M/41.5M [04:20<13:53, 33.3kB/s]
     36%|###6      | 15.1M/41.5M [04:20<12:40, 36.4kB/s]
     36%|###6      | 15.1M/41.5M [04:20<11:45, 39.2kB/s]
     36%|###6      | 15.1M/41.5M [04:20<11:06, 41.5kB/s]
     36%|###6      | 15.1M/41.5M [04:20<10:37, 43.4kB/s]
     36%|###6      | 15.1M/41.5M [04:21<13:05, 35.2kB/s]
     36%|###6      | 15.1M/41.5M [04:21<12:00, 38.4kB/s]
     36%|###6      | 15.1M/41.5M [04:21<08:40, 53.1kB/s]
     36%|###6      | 15.1M/41.5M [04:21<08:52, 51.9kB/s]
     36%|###6      | 15.1M/41.5M [04:21<09:01, 51.0kB/s]
     37%|###6      | 15.1M/41.5M [04:21<09:08, 50.4kB/s]
     37%|###6      | 15.2M/41.5M [04:22<09:13, 49.9kB/s]
     37%|###6      | 15.2M/41.5M [04:22<07:13, 63.6kB/s]
     37%|###6      | 15.2M/41.5M [04:22<07:45, 59.2kB/s]
     37%|###6      | 15.2M/41.5M [04:22<06:31, 70.4kB/s]
     37%|###6      | 15.2M/41.5M [04:22<07:10, 64.0kB/s]
     37%|###6      | 15.
 2M/41.5M [04:23<08:03, 56.9kB/s]
     37%|###6      | 15.2M/41.5M [04:23<06:51, 66.9kB/s]
     37%|###6      | 15.2M/41.5M [04:23<06:31, 70.3kB/s]
     37%|###6      | 15.3M/41.5M [04:23<06:37, 69.1kB/s]
     37%|###6      | 15.3M/41.5M [04:23<07:44, 59.2kB/s]
     37%|###6      | 15.3M/41.5M [04:24<06:07, 74.7kB/s]
     37%|###6      | 15.3M/41.5M [04:24<07:42, 59.4kB/s]
     37%|###6      | 15.3M/41.5M [04:24<06:41, 68.4kB/s]
     37%|###6      | 15.3M/41.5M [04:24<07:12, 63.4kB/s]
     37%|###6      | 15.3M/41.5M [04:24<07:41, 59.5kB/s]
     37%|###6      | 15.3M/41.5M [04:25<10:18, 44.3kB/s]
     37%|###7      | 15.4M/41.5M [04:25<08:02, 56.8kB/s]
     37%|###7      | 15.4M/41.5M [04:25<08:20, 54.8kB/s]
     37%|###7      | 15.4M/41.5M [04:25<10:51, 42.0kB/s]
     37%|###7      | 15.4M/41.5M [04:26<15:17, 29.8kB/s]
     37%|###7      | 15.4M/41.5M [04:26<19:15, 23.7kB/s]
     37%|###7      | 15.4M/41.5M [04:27<19:06, 23.9kB/s]
     37%|###7      | 15.4M/41.5M [04:27<18:59, 24.0k
 B/s]
     37%|###7      | 15.4M/41.5M [04:27<16:12, 28.1kB/s]
     37%|###7      | 15.4M/41.5M [04:27<14:12, 32.1kB/s]
     37%|###7      | 15.4M/41.5M [04:28<15:32, 29.3kB/s]
     37%|###7      | 15.4M/41.5M [04:28<13:42, 33.2kB/s]
     37%|###7      | 15.4M/41.5M [04:28<12:24, 36.7kB/s]
     37%|###7      | 15.5M/41.5M [04:28<08:51, 51.3kB/s]
     37%|###7      | 15.5M/41.5M [04:28<08:58, 50.7kB/s]
     37%|###7      | 15.5M/41.5M [04:29<09:04, 50.1kB/s]
     37%|###7      | 15.5M/41.5M [04:29<07:11, 63.2kB/s]
     37%|###7      | 15.5M/41.5M [04:29<07:41, 59.1kB/s]
     37%|###7      | 15.5M/41.5M [04:29<06:28, 70.1kB/s]
     37%|###7      | 15.5M/41.5M [04:29<07:30, 60.4kB/s]
     37%|###7      | 15.5M/41.5M [04:30<05:56, 76.4kB/s]
     37%|###7      | 15.6M/41.5M [04:30<06:05, 74.4kB/s]
     38%|###7      | 15.6M/41.5M [04:30<08:34, 52.9kB/s]
     38%|###7      | 15.6M/41.5M [04:30<06:26, 70.3kB/s]
     38%|###7      | 15.6M/41.5M [04:31<07:18, 61.9kB/s]
     38%|###7      | 15
 .6M/41.5M [04:31<06:29, 69.6kB/s]
     38%|###7      | 15.6M/41.5M [04:31<06:59, 64.7kB/s]
     38%|###7      | 15.6M/41.5M [04:32<15:22, 29.4kB/s]
     38%|###7      | 15.7M/41.5M [04:32<09:23, 48.1kB/s]
     38%|###7      | 15.7M/41.5M [04:32<09:21, 48.2kB/s]
     38%|###7      | 15.7M/41.5M [04:33<11:00, 41.0kB/s]
     38%|###7      | 15.7M/41.5M [04:33<08:50, 51.0kB/s]
     38%|###7      | 15.7M/41.5M [04:33<08:55, 50.5kB/s]
     38%|###7      | 15.7M/41.5M [04:33<08:59, 50.1kB/s]
     38%|###7      | 15.7M/41.5M [04:33<09:03, 49.8kB/s]
     38%|###7      | 15.7M/41.5M [04:34<09:07, 49.3kB/s]
     38%|###7      | 15.7M/41.5M [04:34<09:09, 49.2kB/s]
     38%|###7      | 15.8M/41.5M [04:34<09:10, 49.0kB/s]
     38%|###7      | 15.8M/41.5M [04:34<09:11, 48.9kB/s]
     38%|###7      | 15.8M/41.5M [04:34<09:56, 45.2kB/s]
     38%|###8      | 15.8M/41.5M [04:35<08:58, 50.0kB/s]
     38%|###8      | 15.8M/41.5M [04:35<11:40, 38.5kB/s]
     38%|###8      | 15.8M/41.5M [04:35<13:37, 33.0
 kB/s]
     38%|###8      | 15.8M/41.5M [04:35<11:31, 39.0kB/s]
     38%|###8      | 15.8M/41.5M [04:36<14:22, 31.2kB/s]
     38%|###8      | 15.8M/41.5M [04:36<15:34, 28.8kB/s]
     38%|###8      | 15.8M/41.5M [04:36<16:25, 27.3kB/s]
     38%|###8      | 15.8M/41.5M [04:37<18:08, 24.7kB/s]
     38%|###8      | 15.8M/41.5M [04:38<24:23, 18.4kB/s]
     38%|###8      | 15.8M/41.5M [04:38<22:36, 19.8kB/s]
     38%|###8      | 15.9M/41.5M [04:38<18:35, 24.1kB/s]
     38%|###8      | 15.9M/41.5M [04:38<18:31, 24.2kB/s]
     38%|###8      | 15.9M/41.5M [04:39<15:44, 28.5kB/s]
     38%|###8      | 15.9M/41.5M [04:39<13:46, 32.5kB/s]
     38%|###8      | 15.9M/41.5M [04:39<12:23, 36.1kB/s]
     38%|###8      | 15.9M/41.5M [04:39<11:26, 39.1kB/s]
     38%|###8      | 15.9M/41.5M [04:39<10:45, 41.6kB/s]
     38%|###8      | 15.9M/41.5M [04:39<07:54, 56.5kB/s]
     38%|###8      | 15.9M/41.5M [04:40<08:13, 54.3kB/s]
     38%|###8      | 15.9M/41.5M [04:40<08:28, 52.7kB/s]
     38%|###8      | 1
 5.9M/41.5M [04:40<11:09, 40.0kB/s]
     38%|###8      | 16.0M/41.5M [04:40<09:48, 45.5kB/s]
     39%|###8      | 16.0M/41.5M [04:41<08:22, 53.2kB/s]
     39%|###8      | 16.0M/41.5M [04:41<08:31, 52.3kB/s]
     39%|###8      | 16.0M/41.5M [04:41<08:39, 51.5kB/s]
     39%|###8      | 16.0M/41.5M [04:41<08:46, 50.8kB/s]
     39%|###8      | 16.0M/41.5M [04:41<08:51, 50.3kB/s]
     39%|###8      | 16.0M/41.5M [04:42<08:56, 49.8kB/s]
     39%|###8      | 16.0M/41.5M [04:42<08:59, 49.5kB/s]
     39%|###8      | 16.0M/41.5M [04:42<12:19, 36.1kB/s]
     39%|###8      | 16.0M/41.5M [04:42<10:20, 43.0kB/s]
     39%|###8      | 16.1M/41.5M [04:43<08:35, 51.8kB/s]
     39%|###8      | 16.1M/41.5M [04:43<08:42, 51.1kB/s]
     39%|###8      | 16.1M/41.5M [04:43<08:12, 54.2kB/s]
     39%|###8      | 16.1M/41.5M [04:43<08:25, 52.7kB/s]
     39%|###8      | 16.1M/41.5M [04:43<08:36, 51.5kB/s]
     39%|###8      | 16.1M/41.5M [04:43<08:04, 54.9kB/s]
     39%|###8      | 16.1M/41.5M [04:44<09:03, 48.
 9kB/s]
     39%|###8      | 16.1M/41.5M [04:44<08:30, 52.1kB/s]
     39%|###8      | 16.1M/41.5M [04:44<08:39, 51.2kB/s]
     39%|###8      | 16.1M/41.5M [04:44<08:46, 50.5kB/s]
     39%|###8      | 16.1M/41.5M [04:44<08:51, 50.0kB/s]
     39%|###8      | 16.2M/41.5M [04:45<11:29, 38.5kB/s]
     39%|###8      | 16.2M/41.5M [04:45<08:23, 52.7kB/s]
     39%|###8      | 16.2M/41.5M [04:45<08:33, 51.7kB/s]
     39%|###9      | 16.2M/41.5M [04:45<11:01, 40.1kB/s]
     39%|###9      | 16.2M/41.5M [04:46<10:11, 43.3kB/s]
     39%|###9      | 16.2M/41.5M [04:46<09:56, 44.5kB/s]
     39%|###9      | 16.2M/41.5M [04:46<09:43, 45.4kB/s]
     39%|###9      | 16.2M/41.5M [04:46<09:32, 46.2kB/s]
     39%|###9      | 16.2M/41.5M [04:46<09:24, 46.9kB/s]
     39%|###9      | 16.2M/41.5M [04:47<09:18, 47.4kB/s]
     39%|###9      | 16.2M/41.5M [04:47<11:50, 37.3kB/s]
     39%|###9      | 16.3M/41.5M [04:47<08:32, 51.6kB/s]
     39%|###9      | 16.3M/41.5M [04:47<08:40, 50.8kB/s]
     39%|###9      | 
 16.3M/41.5M [04:47<08:45, 50.3kB/s]
     39%|###9      | 16.3M/41.5M [04:48<08:50, 49.8kB/s]
     39%|###9      | 16.3M/41.5M [04:48<06:57, 63.2kB/s]
     39%|###9      | 16.3M/41.5M [04:48<07:27, 59.0kB/s]
     39%|###9      | 16.3M/41.5M [04:48<07:51, 56.0kB/s]
     39%|###9      | 16.3M/41.5M [04:48<06:27, 68.1kB/s]
     39%|###9      | 16.3M/41.5M [04:48<07:02, 62.4kB/s]
     39%|###9      | 16.4M/41.5M [04:49<06:02, 72.7kB/s]
     39%|###9      | 16.4M/41.5M [04:49<05:29, 79.9kB/s]
     39%|###9      | 16.4M/41.5M [04:49<06:12, 70.6kB/s]
     40%|###9      | 16.4M/41.5M [04:49<05:34, 78.6kB/s]
     40%|###9      | 16.4M/41.5M [04:49<05:12, 84.2kB/s]
     40%|###9      | 16.4M/41.5M [04:49<04:58, 88.1kB/s]
     40%|###9      | 16.5M/41.5M [04:50<04:09, 105kB/s] 
     40%|###9      | 16.5M/41.5M [04:50<04:14, 103kB/s]
     40%|###9      | 16.5M/41.5M [04:50<05:36, 77.8kB/s]
     40%|###9      | 16.5M/41.5M [04:50<05:36, 78.0kB/s]
     40%|###9      | 16.5M/41.5M [04:51<05:16, 82.
 7kB/s]
     40%|###9      | 16.5M/41.5M [04:51<10:36, 41.1kB/s]
     40%|###9      | 16.5M/41.5M [04:52<08:46, 49.7kB/s]
     40%|###9      | 16.6M/41.5M [04:52<08:48, 49.5kB/s]
     40%|###9      | 16.6M/41.5M [04:52<08:50, 49.3kB/s]
     40%|###9      | 16.6M/41.5M [04:52<08:51, 49.2kB/s]
     40%|###9      | 16.6M/41.5M [04:52<07:09, 60.9kB/s]
     40%|###9      | 16.6M/41.5M [04:52<07:32, 57.7kB/s]
     40%|####      | 16.6M/41.5M [04:53<06:21, 68.4kB/s]
     40%|####      | 16.6M/41.5M [04:53<06:54, 62.9kB/s]
     40%|####      | 16.6M/41.5M [04:53<05:58, 72.7kB/s]
     40%|####      | 16.6M/41.5M [04:53<06:36, 65.7kB/s]
     40%|####      | 16.7M/41.5M [04:53<05:47, 75.0kB/s]
     40%|####      | 16.7M/41.5M [04:53<05:19, 81.5kB/s]
     40%|####      | 16.7M/41.5M [04:54<05:01, 86.2kB/s]
     40%|####      | 16.7M/41.5M [04:54<04:50, 89.5kB/s]
     40%|####      | 16.7M/41.5M [04:54<04:42, 91.8kB/s]
     40%|####      | 16.7M/41.5M [04:54<04:37, 93.5kB/s]
     40%|####      | 
 16.8M/41.5M [04:54<04:34, 94.6kB/s]
     40%|####      | 16.8M/41.5M [04:54<04:52, 88.5kB/s]
     40%|####      | 16.8M/41.5M [04:55<04:24, 98.1kB/s]
     40%|####      | 16.8M/41.5M [04:55<04:24, 97.8kB/s]
     41%|####      | 16.8M/41.5M [04:55<04:24, 97.7kB/s]
     41%|####      | 16.8M/41.5M [04:55<06:10, 69.8kB/s]
     41%|####      | 16.9M/41.5M [04:56<03:53, 111kB/s] 
     41%|####      | 16.9M/41.5M [04:56<03:59, 108kB/s]
     41%|####      | 16.9M/41.5M [04:56<04:05, 105kB/s]
     41%|####      | 16.9M/41.5M [04:56<04:09, 103kB/s]
     41%|####      | 16.9M/41.5M [04:57<05:38, 76.1kB/s]
     41%|####      | 17.0M/41.5M [04:57<06:25, 66.7kB/s]
     41%|####      | 17.0M/41.5M [04:57<04:33, 93.9kB/s]
     41%|####1     | 17.0M/41.5M [04:57<04:47, 89.4kB/s]
     41%|####1     | 17.0M/41.5M [04:58<04:41, 91.1kB/s]
     41%|####1     | 17.0M/41.5M [04:58<05:25, 78.6kB/s]
     41%|####1     | 17.1M/41.5M [04:58<05:09, 82.9kB/s]
     41%|####1     | 17.1M/41.5M [04:58<04:56, 86.4k
 B/s]
     41%|####1     | 17.1M/41.5M [04:58<05:06, 83.4kB/s]
     41%|####1     | 17.1M/41.5M [04:59<04:53, 87.0kB/s]
     41%|####1     | 17.1M/41.5M [04:59<06:25, 66.3kB/s]
     41%|####1     | 17.1M/41.5M [04:59<05:04, 83.9kB/s]
     41%|####1     | 17.2M/41.5M [04:59<04:52, 87.1kB/s]
     41%|####1     | 17.2M/41.5M [04:59<05:04, 83.7kB/s]
     41%|####1     | 17.2M/41.5M [05:00<05:45, 73.7kB/s]
     41%|####1     | 17.2M/41.5M [05:00<05:21, 79.3kB/s]
     42%|####1     | 17.2M/41.5M [05:00<05:03, 83.8kB/s]
     42%|####1     | 17.2M/41.5M [05:00<04:51, 87.3kB/s]
     42%|####1     | 17.3M/41.5M [05:00<04:42, 90.0kB/s]
     42%|####1     | 17.3M/41.5M [05:01<04:35, 92.0kB/s]
     42%|####1     | 17.3M/41.5M [05:01<04:31, 93.6kB/s]
     42%|####1     | 17.3M/41.5M [05:01<04:27, 94.7kB/s]
     42%|####1     | 17.3M/41.5M [05:01<04:25, 95.5kB/s]
     42%|####1     | 17.3M/41.5M [05:01<05:38, 74.9kB/s]
     42%|####1     | 17.4M/41.5M [05:02<05:44, 73.5kB/s]
     42%|####1     | 17
 .4M/41.5M [05:02<06:49, 61.8kB/s]
     42%|####1     | 17.4M/41.5M [05:02<06:32, 64.4kB/s]
     42%|####1     | 17.4M/41.5M [05:03<06:53, 61.1kB/s]
     42%|####1     | 17.4M/41.5M [05:03<07:13, 58.2kB/s]
     42%|####1     | 17.4M/41.5M [05:03<06:20, 66.4kB/s]
     42%|####2     | 17.4M/41.5M [05:03<06:47, 61.8kB/s]
     42%|####2     | 17.4M/41.5M [05:03<07:12, 58.3kB/s]
     42%|####2     | 17.4M/41.5M [05:03<07:21, 57.1kB/s]
     42%|####2     | 17.5M/41.5M [05:04<07:41, 54.6kB/s]
     42%|####2     | 17.5M/41.5M [05:04<06:25, 65.4kB/s]
     42%|####2     | 17.5M/41.5M [05:04<08:45, 47.9kB/s]
     42%|####2     | 17.5M/41.5M [05:04<07:34, 55.3kB/s]
     42%|####2     | 17.5M/41.5M [05:04<06:23, 65.6kB/s]
     42%|####2     | 17.5M/41.5M [05:05<06:50, 61.2kB/s]
     42%|####2     | 17.5M/41.5M [05:05<06:33, 63.9kB/s]
     42%|####2     | 17.5M/41.5M [05:05<05:39, 73.9kB/s]
     42%|####2     | 17.6M/41.5M [05:05<05:43, 73.1kB/s]
     42%|####2     | 17.6M/41.5M [05:05<05:14, 79.8
 kB/s]
     42%|####2     | 17.6M/41.5M [05:05<05:17, 79.0kB/s]
     42%|####2     | 17.6M/41.5M [05:06<07:08, 58.4kB/s]
     42%|####2     | 17.6M/41.5M [05:06<06:59, 59.8kB/s]
     42%|####2     | 17.6M/41.5M [05:06<04:54, 85.1kB/s]
     42%|####2     | 17.6M/41.5M [05:06<06:05, 68.5kB/s]
     43%|####2     | 17.6M/41.5M [05:07<07:41, 54.2kB/s]
     43%|####2     | 17.7M/41.5M [05:07<06:29, 64.1kB/s]
     43%|####2     | 17.7M/41.5M [05:07<08:36, 48.3kB/s]
     43%|####2     | 17.7M/41.5M [05:07<08:35, 48.4kB/s]
     43%|####2     | 17.7M/41.5M [05:07<08:35, 48.5kB/s]
     43%|####2     | 17.7M/41.5M [05:08<08:34, 48.5kB/s]
     43%|####2     | 17.7M/41.5M [05:08<06:45, 61.6kB/s]
     43%|####2     | 17.7M/41.5M [05:08<09:08, 45.4kB/s]
     43%|####2     | 17.7M/41.5M [05:08<09:20, 44.5kB/s]
     43%|####2     | 17.7M/41.5M [05:09<08:38, 48.0kB/s]
     43%|####2     | 17.8M/41.5M [05:09<08:36, 48.2kB/s]
     43%|####2     | 17.8M/41.5M [05:09<08:35, 48.3kB/s]
     43%|####2     | 1
 7.8M/41.5M [05:09<08:34, 48.4kB/s]
     43%|####2     | 17.8M/41.5M [05:09<08:33, 48.4kB/s]
     43%|####2     | 17.8M/41.5M [05:10<10:50, 38.2kB/s]
     43%|####2     | 17.8M/41.5M [05:10<07:58, 52.0kB/s]
     43%|####2     | 17.8M/41.5M [05:10<08:05, 51.1kB/s]
     43%|####2     | 17.8M/41.5M [05:10<10:21, 40.0kB/s]
     43%|####2     | 17.8M/41.5M [05:11<09:34, 43.2kB/s]
     43%|####2     | 17.8M/41.5M [05:11<09:19, 44.4kB/s]
     43%|####3     | 17.8M/41.5M [05:11<09:06, 45.4kB/s]
     43%|####3     | 17.9M/41.5M [05:11<08:56, 46.2kB/s]
     43%|####3     | 17.9M/41.5M [05:12<16:47, 24.6kB/s]
     43%|####3     | 17.9M/41.5M [05:12<10:56, 37.7kB/s]
     43%|####3     | 17.9M/41.5M [05:12<10:44, 38.4kB/s]
     43%|####3     | 17.9M/41.5M [05:13<10:09, 40.6kB/s]
     43%|####3     | 17.9M/41.5M [05:13<09:42, 42.5kB/s]
     43%|####3     | 17.9M/41.5M [05:13<09:22, 44.0kB/s]
     43%|####3     | 17.9M/41.5M [05:13<09:07, 45.2kB/s]
     43%|####3     | 17.9M/41.5M [05:13<08:55, 46.
 1kB/s]
     43%|####3     | 17.9M/41.5M [05:13<08:47, 46.8kB/s]
     43%|####3     | 17.9M/41.5M [05:14<08:12, 50.1kB/s]
     43%|####3     | 17.9M/41.5M [05:14<08:16, 49.7kB/s]
     43%|####3     | 18.0M/41.5M [05:14<06:25, 64.0kB/s]
     43%|####3     | 18.0M/41.5M [05:14<06:55, 59.4kB/s]
     43%|####3     | 18.0M/41.5M [05:14<07:44, 53.1kB/s]
     43%|####3     | 18.0M/41.5M [05:14<06:14, 65.9kB/s]
     43%|####3     | 18.0M/41.5M [05:15<06:22, 64.4kB/s]
     43%|####3     | 18.0M/41.5M [05:15<09:00, 45.6kB/s]
     43%|####3     | 18.0M/41.5M [05:15<07:21, 55.7kB/s]
     43%|####3     | 18.0M/41.5M [05:15<06:10, 66.4kB/s]
     43%|####3     | 18.0M/41.5M [05:15<06:38, 61.7kB/s]
     44%|####3     | 18.1M/41.5M [05:16<08:59, 45.5kB/s]
     44%|####3     | 18.1M/41.5M [05:16<07:05, 57.7kB/s]
     44%|####3     | 18.1M/41.5M [05:16<07:52, 52.0kB/s]
     44%|####3     | 18.1M/41.5M [05:16<09:28, 43.1kB/s]
     44%|####3     | 18.1M/41.5M [05:17<08:51, 46.2kB/s]
     44%|####3     | 
 18.1M/41.5M [05:17<08:43, 46.8kB/s]
     44%|####3     | 18.1M/41.5M [05:17<08:38, 47.3kB/s]
     44%|####3     | 18.1M/41.5M [05:17<10:56, 37.3kB/s]
     44%|####3     | 18.1M/41.5M [05:17<07:55, 51.5kB/s]
     44%|####3     | 18.1M/41.5M [05:18<08:02, 50.8kB/s]
     44%|####3     | 18.1M/41.5M [05:18<08:07, 50.2kB/s]
     44%|####3     | 18.2M/41.5M [05:18<10:27, 39.0kB/s]
     44%|####3     | 18.2M/41.5M [05:18<08:01, 50.8kB/s]
     44%|####3     | 18.2M/41.5M [05:18<08:06, 50.2kB/s]
     44%|####3     | 18.2M/41.5M [05:19<08:10, 49.8kB/s]
     44%|####3     | 18.2M/41.5M [05:19<08:49, 46.2kB/s]
     44%|####3     | 18.2M/41.5M [05:19<06:49, 59.7kB/s]
     44%|####3     | 18.2M/41.5M [05:19<08:02, 50.6kB/s]
     44%|####3     | 18.2M/41.5M [05:19<08:07, 50.1kB/s]
     44%|####3     | 18.2M/41.5M [05:20<06:59, 58.1kB/s]
     44%|####3     | 18.2M/41.5M [05:20<07:17, 55.7kB/s]
     44%|####4     | 18.3M/41.5M [05:20<09:00, 45.1kB/s]
     44%|####4     | 18.3M/41.5M [05:20<07:31, 53
 .9kB/s]
     44%|####4     | 18.3M/41.5M [05:20<07:42, 52.6kB/s]
     44%|####4     | 18.3M/41.5M [05:21<07:51, 51.6kB/s]
     44%|####4     | 18.3M/41.5M [05:21<06:20, 63.9kB/s]
     44%|####4     | 18.3M/41.5M [05:21<06:47, 59.7kB/s]
     44%|####4     | 18.3M/41.5M [05:21<09:12, 44.0kB/s]
     44%|####4     | 18.3M/41.5M [05:22<09:15, 43.7kB/s]
     44%|####4     | 18.4M/41.5M [05:22<08:54, 45.4kB/s]
     44%|####4     | 18.4M/41.5M [05:22<10:30, 38.4kB/s]
     44%|####4     | 18.4M/41.5M [05:23<11:54, 33.9kB/s]
     44%|####4     | 18.4M/41.5M [05:23<11:01, 36.7kB/s]
     44%|####4     | 18.4M/41.5M [05:23<15:43, 25.7kB/s]
     44%|####4     | 18.4M/41.5M [05:24<17:39, 22.9kB/s]
     44%|####4     | 18.4M/41.5M [05:24<17:21, 23.3kB/s]
     44%|####4     | 18.4M/41.5M [05:25<17:07, 23.6kB/s]
     44%|####4     | 18.4M/41.5M [05:25<16:57, 23.8kB/s]
     44%|####4     | 18.4M/41.5M [05:25<14:25, 28.0kB/s]
     44%|####4     | 18.4M/41.5M [05:25<12:36, 32.0kB/s]
     44%|####4     |
  18.4M/41.5M [05:25<11:19, 35.6kB/s]
     44%|####4     | 18.4M/41.5M [05:26<10:24, 38.7kB/s]
     44%|####4     | 18.5M/41.5M [05:26<09:46, 41.2kB/s]
     44%|####4     | 18.5M/41.5M [05:26<09:19, 43.2kB/s]
     45%|####4     | 18.5M/41.5M [05:26<06:55, 58.1kB/s]
     45%|####4     | 18.5M/41.5M [05:26<07:15, 55.4kB/s]
     45%|####4     | 18.5M/41.5M [05:26<07:31, 53.5kB/s]
     45%|####4     | 18.5M/41.5M [05:27<06:04, 66.2kB/s]
     45%|####4     | 18.5M/41.5M [05:27<06:34, 61.1kB/s]
     45%|####4     | 18.5M/41.5M [05:27<09:03, 44.3kB/s]
     45%|####4     | 18.5M/41.5M [05:27<06:59, 57.3kB/s]
     45%|####4     | 18.6M/41.5M [05:27<04:59, 80.4kB/s]
     45%|####4     | 18.6M/41.5M [05:28<04:43, 84.7kB/s]
     45%|####4     | 18.6M/41.5M [05:28<04:52, 82.2kB/s]
     45%|####4     | 18.6M/41.5M [05:28<04:38, 86.3kB/s]
     45%|####4     | 18.6M/41.5M [05:28<04:46, 83.8kB/s]
     45%|####4     | 18.6M/41.5M [05:28<04:16, 93.5kB/s]
     45%|####4     | 18.7M/41.5M [05:29<05:27, 7
 3.2kB/s]
     45%|####5     | 18.7M/41.5M [05:29<04:42, 84.7kB/s]
     45%|####5     | 18.7M/41.5M [05:29<04:31, 88.1kB/s]
     45%|####5     | 18.7M/41.5M [05:29<04:23, 90.6kB/s]
     45%|####5     | 18.7M/41.5M [05:29<04:55, 80.9kB/s]
     45%|####5     | 18.7M/41.5M [05:30<04:40, 85.2kB/s]
     45%|####5     | 18.8M/41.5M [05:30<04:29, 88.5kB/s]
     45%|####5     | 18.8M/41.5M [05:30<05:35, 71.0kB/s]
     45%|####5     | 18.8M/41.5M [05:30<04:28, 88.8kB/s]
     45%|####5     | 18.8M/41.5M [05:30<04:21, 90.9kB/s]
     45%|####5     | 18.8M/41.5M [05:31<05:58, 66.4kB/s]
     45%|####5     | 18.8M/41.5M [05:31<05:25, 73.0kB/s]
     45%|####5     | 18.9M/41.5M [05:31<04:55, 80.4kB/s]
     45%|####5     | 18.9M/41.5M [05:31<04:42, 84.1kB/s]
     46%|####5     | 18.9M/41.5M [05:32<04:31, 87.3kB/s]
     46%|####5     | 18.9M/41.5M [05:32<04:23, 89.9kB/s]
     46%|####5     | 18.9M/41.5M [05:32<04:17, 91.9kB/s]
     46%|####5     | 18.9M/41.5M [05:32<04:13, 93.5kB/s]
     46%|####5     
 | 19.0M/41.5M [05:32<04:08, 95.0kB/s]
     46%|####5     | 19.0M/41.5M [05:32<04:07, 95.3kB/s]
     46%|####5     | 19.0M/41.5M [05:33<04:06, 95.9kB/s]
     46%|####5     | 19.0M/41.5M [05:33<04:39, 84.4kB/s]
     46%|####5     | 19.0M/41.5M [05:33<04:28, 87.9kB/s]
     46%|####5     | 19.0M/41.5M [05:33<04:37, 84.9kB/s]
     46%|####5     | 19.0M/41.5M [05:34<06:34, 59.7kB/s]
     46%|####5     | 19.1M/41.5M [05:34<06:08, 63.8kB/s]
     46%|####5     | 19.1M/41.5M [05:34<06:28, 60.5kB/s]
     46%|####5     | 19.1M/41.5M [05:34<06:47, 57.7kB/s]
     46%|####6     | 19.1M/41.5M [05:34<07:04, 55.4kB/s]
     46%|####6     | 19.1M/41.5M [05:35<07:18, 53.6kB/s]
     46%|####6     | 19.1M/41.5M [05:35<06:20, 61.7kB/s]
     46%|####6     | 19.1M/41.5M [05:35<08:32, 45.8kB/s]
     46%|####6     | 19.1M/41.5M [05:35<06:46, 57.7kB/s]
     46%|####6     | 19.1M/41.5M [05:35<07:02, 55.5kB/s]
     46%|####6     | 19.1M/41.5M [05:36<06:49, 57.2kB/s]
     46%|####6     | 19.2M/41.5M [05:36<06:03, 
 64.4kB/s]
     46%|####6     | 19.2M/41.5M [05:36<06:29, 60.0kB/s]
     46%|####6     | 19.2M/41.5M [05:36<06:24, 60.8kB/s]
     46%|####6     | 19.2M/41.5M [05:36<05:48, 67.1kB/s]
     46%|####6     | 19.2M/41.5M [05:36<06:18, 61.8kB/s]
     46%|####6     | 19.2M/41.5M [05:37<06:15, 62.2kB/s]
     46%|####6     | 19.2M/41.5M [05:37<05:42, 68.1kB/s]
     46%|####6     | 19.2M/41.5M [05:37<07:37, 51.0kB/s]
     46%|####6     | 19.3M/41.5M [05:37<06:47, 57.2kB/s]
     46%|####6     | 19.3M/41.5M [05:38<05:53, 65.8kB/s]
     46%|####6     | 19.3M/41.5M [05:38<06:16, 61.8kB/s]
     46%|####6     | 19.3M/41.5M [05:38<06:38, 58.5kB/s]
     47%|####6     | 19.3M/41.5M [05:38<06:56, 55.9kB/s]
     47%|####6     | 19.3M/41.5M [05:38<07:11, 53.9kB/s]
     47%|####6     | 19.3M/41.5M [05:39<09:28, 40.9kB/s]
     47%|####6     | 19.3M/41.5M [05:39<07:07, 54.4kB/s]
     47%|####6     | 19.3M/41.5M [05:39<07:18, 52.9kB/s]
     47%|####6     | 19.3M/41.5M [05:39<09:27, 40.9kB/s]
     47%|####6    
  | 19.4M/41.5M [05:40<12:54, 30.0kB/s]
     47%|####6     | 19.4M/41.5M [05:40<10:46, 35.9kB/s]
     47%|####6     | 19.4M/41.5M [05:41<13:49, 27.9kB/s]
     47%|####6     | 19.4M/41.5M [05:41<14:20, 26.9kB/s]
     47%|####6     | 19.4M/41.5M [05:41<14:44, 26.2kB/s]
     47%|####6     | 19.4M/41.5M [05:42<16:14, 23.8kB/s]
     47%|####6     | 19.4M/41.5M [05:42<13:53, 27.8kB/s]
     47%|####6     | 19.4M/41.5M [05:42<13:12, 29.2kB/s]
     47%|####6     | 19.4M/41.5M [05:43<13:58, 27.6kB/s]
     47%|####6     | 19.4M/41.5M [05:43<12:11, 31.6kB/s]
     47%|####6     | 19.4M/41.5M [05:43<13:15, 29.1kB/s]
     47%|####6     | 19.4M/41.5M [05:43<11:40, 33.0kB/s]
     47%|####6     | 19.5M/41.5M [05:44<12:54, 29.8kB/s]
     47%|####6     | 19.5M/41.5M [05:44<13:46, 27.9kB/s]
     47%|####6     | 19.5M/41.5M [05:44<14:22, 26.8kB/s]
     47%|####6     | 19.5M/41.5M [05:45<12:03, 31.9kB/s]
     47%|####6     | 19.5M/41.5M [05:45<14:56, 25.7kB/s]
     47%|####6     | 19.5M/41.5M [05:45<13:05,
  29.3kB/s]
     47%|####7     | 19.5M/41.5M [05:45<11:40, 32.9kB/s]
     47%|####7     | 19.5M/41.5M [05:46<12:49, 29.9kB/s]
     47%|####7     | 19.5M/41.5M [05:46<11:24, 33.6kB/s]
     47%|####7     | 19.5M/41.5M [05:46<10:23, 36.9kB/s]
     47%|####7     | 19.5M/41.5M [05:47<14:15, 26.9kB/s]
     47%|####7     | 19.6M/41.5M [05:47<09:34, 40.1kB/s]
     47%|####7     | 19.6M/41.5M [05:47<09:09, 41.9kB/s]
     47%|####7     | 19.6M/41.5M [05:47<10:51, 35.3kB/s]
     47%|####7     | 19.6M/41.5M [05:47<10:02, 38.1kB/s]
     47%|####7     | 19.6M/41.5M [05:48<09:26, 40.6kB/s]
     47%|####7     | 19.6M/41.5M [05:48<08:59, 42.6kB/s]
     47%|####7     | 19.6M/41.5M [05:48<13:12, 29.0kB/s]
     47%|####7     | 19.6M/41.5M [05:49<09:30, 40.2kB/s]
     47%|####7     | 19.6M/41.5M [05:49<09:06, 42.0kB/s]
     47%|####7     | 19.6M/41.5M [05:49<08:46, 43.5kB/s]
     47%|####7     | 19.6M/41.5M [05:49<08:31, 44.8kB/s]
     47%|####7     | 19.6M/41.5M [05:49<08:20, 45.8kB/s]
     47%|####7   
   | 19.7M/41.5M [05:49<08:11, 46.6kB/s]
     47%|####7     | 19.7M/41.5M [05:50<08:05, 47.2kB/s]
     47%|####7     | 19.7M/41.5M [05:50<08:00, 47.6kB/s]
     47%|####7     | 19.7M/41.5M [05:50<06:08, 62.0kB/s]
     47%|####7     | 19.7M/41.5M [05:50<06:33, 58.1kB/s]
     48%|####7     | 19.7M/41.5M [05:50<05:27, 69.7kB/s]
     48%|####7     | 19.7M/41.5M [05:50<05:59, 63.4kB/s]
     48%|####7     | 19.7M/41.5M [05:51<05:10, 73.5kB/s]
     48%|####7     | 19.7M/41.5M [05:51<05:44, 66.1kB/s]
     48%|####7     | 19.8M/41.5M [05:51<05:02, 75.4kB/s]
     48%|####7     | 19.8M/41.5M [05:51<07:18, 51.9kB/s]
     48%|####7     | 19.8M/41.5M [05:51<04:20, 87.2kB/s]
     48%|####7     | 19.8M/41.5M [05:52<04:13, 89.7kB/s]
     48%|####7     | 19.8M/41.5M [05:52<08:49, 42.9kB/s]
     48%|####7     | 19.9M/41.5M [05:53<04:09, 91.0kB/s]
     48%|####7     | 19.9M/41.5M [05:53<05:46, 65.3kB/s]
     48%|####8     | 19.9M/41.5M [05:53<04:54, 76.8kB/s]
     48%|####8     | 19.9M/41.5M [05:54<05:28
 , 68.7kB/s]
     48%|####8     | 20.0M/41.5M [05:54<05:06, 73.6kB/s]
     48%|####8     | 20.0M/41.5M [05:54<05:45, 65.2kB/s]
     48%|####8     | 20.0M/41.5M [05:54<05:15, 71.5kB/s]
     48%|####8     | 20.0M/41.5M [05:55<05:55, 63.4kB/s]
     48%|####8     | 20.0M/41.5M [05:55<06:13, 60.3kB/s]
     48%|####8     | 20.0M/41.5M [05:55<06:29, 57.7kB/s]
     48%|####8     | 20.0M/41.5M [05:55<06:45, 55.5kB/s]
     48%|####8     | 20.0M/41.5M [05:56<08:45, 42.8kB/s]
     48%|####8     | 20.1M/41.5M [05:56<08:19, 45.0kB/s]
     48%|####8     | 20.1M/41.5M [05:56<06:42, 55.8kB/s]
     48%|####8     | 20.1M/41.5M [05:56<06:54, 54.2kB/s]
     48%|####8     | 20.1M/41.5M [05:56<07:05, 52.8kB/s]
     48%|####8     | 20.1M/41.5M [05:57<07:13, 51.7kB/s]
     48%|####8     | 20.1M/41.5M [05:57<07:20, 50.9kB/s]
     48%|####8     | 20.1M/41.5M [05:57<05:51, 63.8kB/s]
     49%|####8     | 20.1M/41.5M [05:57<06:16, 59.5kB/s]
     49%|####8     | 20.1M/41.5M [05:57<06:37, 56.4kB/s]
     49%|####8  
    | 20.1M/41.5M [05:57<05:27, 68.3kB/s]
     49%|####8     | 20.2M/41.5M [05:58<05:57, 62.6kB/s]
     49%|####8     | 20.2M/41.5M [05:58<06:37, 56.2kB/s]
     49%|####8     | 20.2M/41.5M [05:58<04:45, 78.2kB/s]
     49%|####8     | 20.2M/41.5M [05:58<04:45, 78.2kB/s]
     49%|####8     | 20.2M/41.5M [05:59<06:29, 57.2kB/s]
     49%|####8     | 20.2M/41.5M [05:59<06:56, 53.4kB/s]
     49%|####8     | 20.3M/41.5M [06:00<06:29, 57.0kB/s]
     49%|####8     | 20.3M/41.5M [06:00<05:34, 66.5kB/s]
     49%|####8     | 20.3M/41.5M [06:00<07:07, 51.9kB/s]
     49%|####8     | 20.3M/41.5M [06:00<07:12, 51.4kB/s]
     49%|####8     | 20.3M/41.5M [06:01<07:16, 50.8kB/s]
     49%|####8     | 20.3M/41.5M [06:01<07:20, 50.4kB/s]
     49%|####9     | 20.3M/41.5M [06:01<07:23, 50.0kB/s]
     49%|####9     | 20.3M/41.5M [06:01<09:19, 39.6kB/s]
     49%|####9     | 20.4M/41.5M [06:01<07:02, 52.4kB/s]
     49%|####9     | 20.4M/41.5M [06:02<07:09, 51.5kB/s]
     49%|####9     | 20.4M/41.5M [06:02<07:1
 5, 50.8kB/s]
     49%|####9     | 20.4M/41.5M [06:02<05:49, 63.3kB/s]
     49%|####9     | 20.4M/41.5M [06:02<06:13, 59.2kB/s]
     49%|####9     | 20.4M/41.5M [06:02<06:33, 56.2kB/s]
     49%|####9     | 20.4M/41.5M [06:02<05:24, 68.0kB/s]
     49%|####9     | 20.4M/41.5M [06:03<05:53, 62.4kB/s]
     49%|####9     | 20.4M/41.5M [06:03<05:03, 72.6kB/s]
     49%|####9     | 20.5M/41.5M [06:03<05:36, 65.6kB/s]
     49%|####9     | 20.5M/41.5M [06:03<06:04, 60.6kB/s]
     49%|####9     | 20.5M/41.5M [06:03<05:08, 71.5kB/s]
     49%|####9     | 20.5M/41.5M [06:04<04:38, 79.2kB/s]
     49%|####9     | 20.5M/41.5M [06:04<05:13, 70.1kB/s]
     49%|####9     | 20.5M/41.5M [06:04<04:41, 78.2kB/s]
     49%|####9     | 20.5M/41.5M [06:04<04:21, 84.0kB/s]
     50%|####9     | 20.5M/41.5M [06:04<05:24, 67.6kB/s]
     50%|####9     | 20.6M/41.5M [06:05<04:12, 87.0kB/s]
     50%|####9     | 20.6M/41.5M [06:05<05:08, 71.0kB/s]
     50%|####9     | 20.6M/41.5M [06:05<04:08, 88.1kB/s]
     50%|####9 
     | 20.6M/41.5M [06:05<04:02, 90.3kB/s]
     50%|####9     | 20.6M/41.5M [06:05<04:12, 86.7kB/s]
     50%|####9     | 20.7M/41.5M [06:06<04:52, 74.7kB/s]
     50%|####9     | 20.7M/41.5M [06:06<06:34, 55.4kB/s]
     50%|####9     | 20.7M/41.5M [06:06<05:38, 64.4kB/s]
     50%|####9     | 20.7M/41.5M [06:06<05:59, 60.7kB/s]
     50%|####9     | 20.7M/41.5M [06:07<07:52, 46.1kB/s]
     50%|####9     | 20.7M/41.5M [06:07<09:07, 39.8kB/s]
     50%|####9     | 20.7M/41.5M [06:07<08:46, 41.4kB/s]
     50%|####9     | 20.7M/41.5M [06:08<08:27, 42.9kB/s]
     50%|####9     | 20.7M/41.5M [06:08<08:12, 44.2kB/s]
     50%|####9     | 20.7M/41.5M [06:08<11:55, 30.4kB/s]
     50%|#####     | 20.8M/41.5M [06:08<08:23, 43.1kB/s]
     50%|#####     | 20.8M/41.5M [06:09<08:10, 44.3kB/s]
     50%|#####     | 20.8M/41.5M [06:09<07:59, 45.3kB/s]
     50%|#####     | 20.8M/41.5M [06:09<09:48, 36.9kB/s]
     50%|#####     | 20.8M/41.5M [06:09<11:11, 32.3kB/s]
     50%|#####     | 20.8M/41.5M [06:10<10:
 07, 35.7kB/s]
     50%|#####     | 20.8M/41.5M [06:10<11:28, 31.5kB/s]
     50%|#####     | 20.8M/41.5M [06:10<10:18, 35.1kB/s]
     50%|#####     | 20.8M/41.5M [06:10<09:27, 38.2kB/s]
     50%|#####     | 20.8M/41.5M [06:10<08:51, 40.8kB/s]
     50%|#####     | 20.8M/41.5M [06:11<08:25, 42.8kB/s]
     50%|#####     | 20.8M/41.5M [06:11<08:07, 44.4kB/s]
     50%|#####     | 20.9M/41.5M [06:11<07:54, 45.6kB/s]
     50%|#####     | 20.9M/41.5M [06:11<07:45, 46.5kB/s]
     50%|#####     | 20.9M/41.5M [06:11<08:02, 44.8kB/s]
     50%|#####     | 20.9M/41.5M [06:12<07:52, 45.7kB/s]
     50%|#####     | 20.9M/41.5M [06:12<06:08, 58.6kB/s]
     50%|#####     | 20.9M/41.5M [06:12<09:48, 36.7kB/s]
     50%|#####     | 20.9M/41.5M [06:13<11:45, 30.6kB/s]
     50%|#####     | 20.9M/41.5M [06:13<10:10, 35.3kB/s]
     50%|#####     | 20.9M/41.5M [06:14<10:17, 34.9kB/s]
     51%|#####     | 21.0M/41.5M [06:14<09:37, 37.3kB/s]
     51%|#####     | 21.0M/41.5M [06:14<10:02, 35.7kB/s]
     51%|#####
      | 21.0M/41.5M [06:14<12:36, 28.4kB/s]
     51%|#####     | 21.0M/41.5M [06:15<10:22, 34.5kB/s]
     51%|#####     | 21.0M/41.5M [06:15<09:53, 36.2kB/s]
     51%|#####     | 21.0M/41.5M [06:15<11:05, 32.3kB/s]
     51%|#####     | 21.0M/41.5M [06:15<10:06, 35.4kB/s]
     51%|#####     | 21.0M/41.5M [06:16<09:21, 38.2kB/s]
     51%|#####     | 21.0M/41.5M [06:16<08:47, 40.7kB/s]
     51%|#####     | 21.0M/41.5M [06:16<08:22, 42.7kB/s]
     51%|#####     | 21.0M/41.5M [06:16<08:04, 44.2kB/s]
     51%|#####     | 21.0M/41.5M [06:16<07:51, 45.4kB/s]
     51%|#####     | 21.1M/41.5M [06:17<09:51, 36.2kB/s]
     51%|#####     | 21.1M/41.5M [06:17<09:40, 36.9kB/s]
     51%|#####     | 21.1M/41.5M [06:17<08:58, 39.7kB/s]
     51%|#####     | 21.1M/41.5M [06:17<08:29, 42.0kB/s]
     51%|#####     | 21.1M/41.5M [06:17<08:08, 43.8kB/s]
     51%|#####     | 21.1M/41.5M [06:18<10:05, 35.3kB/s]
     51%|#####     | 21.1M/41.5M [06:18<11:26, 31.1kB/s]
     51%|#####     | 21.1M/41.5M [06:18<07
 :51, 45.3kB/s]
     51%|#####     | 21.1M/41.5M [06:18<07:43, 46.1kB/s]
     51%|#####     | 21.1M/41.5M [06:19<08:04, 44.1kB/s]
     51%|#####     | 21.1M/41.5M [06:19<07:51, 45.2kB/s]
     51%|#####     | 21.1M/41.5M [06:19<07:42, 46.1kB/s]
     51%|#####     | 21.2M/41.5M [06:19<07:35, 46.8kB/s]
     51%|#####1    | 21.2M/41.5M [06:20<11:12, 31.7kB/s]
     51%|#####1    | 21.2M/41.5M [06:20<09:25, 37.6kB/s]
     51%|#####1    | 21.2M/41.5M [06:20<11:07, 31.9kB/s]
     51%|#####1    | 21.2M/41.5M [06:21<14:20, 24.7kB/s]
     51%|#####1    | 21.2M/41.5M [06:21<14:23, 24.6kB/s]
     51%|#####1    | 21.2M/41.5M [06:21<12:28, 28.4kB/s]
     51%|#####1    | 21.2M/41.5M [06:22<12:59, 27.3kB/s]
     51%|#####1    | 21.2M/41.5M [06:22<11:24, 31.0kB/s]
     51%|#####1    | 21.2M/41.5M [06:22<14:22, 24.6kB/s]
     51%|#####1    | 21.2M/41.5M [06:23<11:08, 31.8kB/s]
     51%|#####1    | 21.3M/41.5M [06:23<10:11, 34.7kB/s]
     51%|#####1    | 21.3M/41.5M [06:23<09:25, 37.5kB/s]
     51%|####
 #1    | 21.3M/41.5M [06:23<08:50, 40.0kB/s]
     51%|#####1    | 21.3M/41.5M [06:23<08:23, 42.1kB/s]
     51%|#####1    | 21.3M/41.5M [06:24<06:41, 52.7kB/s]
     51%|#####1    | 21.3M/41.5M [06:24<06:49, 51.7kB/s]
     51%|#####1    | 21.3M/41.5M [06:24<06:27, 54.6kB/s]
     51%|#####1    | 21.3M/41.5M [06:24<05:38, 62.4kB/s]
     51%|#####1    | 21.3M/41.5M [06:24<06:00, 58.7kB/s]
     51%|#####1    | 21.3M/41.5M [06:24<05:51, 60.1kB/s]
     51%|#####1    | 21.4M/41.5M [06:25<04:55, 71.5kB/s]
     52%|#####1    | 21.4M/41.5M [06:25<04:25, 79.3kB/s]
     52%|#####1    | 21.4M/41.5M [06:25<06:32, 53.8kB/s]
     52%|#####1    | 21.4M/41.5M [06:25<04:34, 76.8kB/s]
     52%|#####1    | 21.4M/41.5M [06:25<04:33, 76.9kB/s]
     52%|#####1    | 21.4M/41.5M [06:26<04:31, 77.4kB/s]
     52%|#####1    | 21.5M/41.5M [06:26<04:14, 82.6kB/s]
     52%|#####1    | 21.5M/41.5M [06:26<04:35, 76.2kB/s]
     52%|#####1    | 21.5M/41.5M [06:26<04:17, 81.5kB/s]
     52%|#####1    | 21.5M/41.5M [06:26<0
 4:04, 85.7kB/s]
     52%|#####1    | 21.5M/41.5M [06:27<04:12, 83.0kB/s]
     52%|#####1    | 21.5M/41.5M [06:27<04:16, 81.7kB/s]
     52%|#####1    | 21.5M/41.5M [06:27<05:41, 61.2kB/s]
     52%|#####1    | 21.6M/41.5M [06:27<05:01, 69.3kB/s]
     52%|#####1    | 21.6M/41.5M [06:28<06:03, 57.5kB/s]
     52%|#####2    | 21.6M/41.5M [06:28<07:42, 45.1kB/s]
     52%|#####2    | 21.6M/41.5M [06:28<08:48, 39.5kB/s]
     52%|#####2    | 21.6M/41.5M [06:29<08:27, 41.1kB/s]
     52%|#####2    | 21.6M/41.5M [06:29<08:34, 40.5kB/s]
     52%|#####2    | 21.6M/41.5M [06:29<09:31, 36.5kB/s]
     52%|#####2    | 21.6M/41.5M [06:29<08:53, 39.0kB/s]
     52%|#####2    | 21.6M/41.5M [06:29<08:25, 41.2kB/s]
     52%|#####2    | 21.6M/41.5M [06:30<08:03, 43.0kB/s]
     52%|#####2    | 21.6M/41.5M [06:30<07:47, 44.5kB/s]
     52%|#####2    | 21.7M/41.5M [06:30<07:35, 45.6kB/s]
     52%|#####2    | 21.7M/41.5M [06:30<07:27, 46.5kB/s]
     52%|#####2    | 21.7M/41.5M [06:30<06:52, 50.4kB/s]
     52%|###
 ##2    | 21.7M/41.5M [06:31<07:25, 46.6kB/s]
     52%|#####2    | 21.7M/41.5M [06:31<06:50, 50.5kB/s]
     52%|#####2    | 21.7M/41.5M [06:31<06:55, 50.0kB/s]
     52%|#####2    | 21.7M/41.5M [06:31<06:58, 49.6kB/s]
     52%|#####2    | 21.7M/41.5M [06:31<09:08, 37.8kB/s]
     52%|#####2    | 21.7M/41.5M [06:32<06:06, 56.5kB/s]
     52%|#####2    | 21.7M/41.5M [06:32<05:49, 59.3kB/s]
     52%|#####2    | 21.8M/41.5M [06:32<05:41, 60.6kB/s]
     52%|#####2    | 21.8M/41.5M [06:32<05:39, 60.9kB/s]
     52%|#####2    | 21.8M/41.5M [06:33<12:35, 27.4kB/s]
     53%|#####2    | 21.8M/41.5M [06:33<05:33, 61.8kB/s]
     53%|#####2    | 21.8M/41.5M [06:33<05:24, 63.5kB/s]
     53%|#####2    | 21.8M/41.5M [06:34<06:24, 53.5kB/s]
     53%|#####2    | 21.9M/41.5M [06:34<06:00, 57.1kB/s]
     53%|#####2    | 21.9M/41.5M [06:34<06:10, 55.5kB/s]
     53%|#####2    | 21.9M/41.5M [06:34<06:20, 54.1kB/s]
     53%|#####2    | 21.9M/41.5M [06:35<06:29, 52.8kB/s]
     53%|#####2    | 21.9M/41.5M [06:35<
 06:37, 51.8kB/s]
     53%|#####2    | 21.9M/41.5M [06:35<05:47, 59.1kB/s]
     53%|#####2    | 21.9M/41.5M [06:35<06:03, 56.4kB/s]
     53%|#####2    | 21.9M/41.5M [06:35<06:17, 54.3kB/s]
     53%|#####2    | 21.9M/41.5M [06:35<06:28, 52.8kB/s]
     53%|#####2    | 21.9M/41.5M [06:36<06:03, 56.3kB/s]
     53%|#####2    | 21.9M/41.5M [06:36<08:50, 38.6kB/s]
     53%|#####2    | 22.0M/41.5M [06:36<05:46, 59.1kB/s]
     53%|#####2    | 22.0M/41.5M [06:36<06:01, 56.6kB/s]
     53%|#####3    | 22.0M/41.5M [06:37<05:31, 61.6kB/s]
     53%|#####3    | 22.0M/41.5M [06:37<05:20, 63.8kB/s]
     53%|#####3    | 22.0M/41.5M [06:37<05:05, 66.8kB/s]
     53%|#####3    | 22.0M/41.5M [06:37<05:29, 61.9kB/s]
     53%|#####3    | 22.0M/41.5M [06:37<05:16, 64.4kB/s]
     53%|#####3    | 22.0M/41.5M [06:37<06:18, 53.9kB/s]
     53%|#####3    | 22.1M/41.5M [06:38<05:07, 66.2kB/s]
     53%|#####3    | 22.1M/41.5M [06:38<05:00, 67.9kB/s]
     53%|#####3    | 22.1M/41.5M [06:38<05:29, 61.9kB/s]
     53%|##
 ###3    | 22.1M/41.5M [06:38<04:39, 72.8kB/s]
     53%|#####3    | 22.1M/41.5M [06:38<04:32, 74.6kB/s]
     53%|#####3    | 22.1M/41.5M [06:38<04:42, 72.0kB/s]
     53%|#####3    | 22.1M/41.5M [06:38<06:28, 52.3kB/s]
     53%|#####3    | 22.1M/41.5M [06:39<05:51, 57.8kB/s]
     53%|#####3    | 22.1M/41.5M [06:39<07:36, 44.5kB/s]
     53%|#####3    | 22.1M/41.5M [06:39<06:10, 54.8kB/s]
     53%|#####3    | 22.1M/41.5M [06:39<06:21, 53.2kB/s]
     53%|#####3    | 22.2M/41.5M [06:39<06:30, 51.9kB/s]
     53%|#####3    | 22.2M/41.5M [06:40<06:37, 51.0kB/s]
     53%|#####3    | 22.2M/41.5M [06:40<06:42, 50.4kB/s]
     53%|#####3    | 22.2M/41.5M [06:40<06:46, 49.9kB/s]
     53%|#####3    | 22.2M/41.5M [06:40<06:49, 49.5kB/s]
     53%|#####3    | 22.2M/41.5M [06:40<06:51, 49.2kB/s]
     54%|#####3    | 22.2M/41.5M [06:40<05:18, 63.5kB/s]
     54%|#####3    | 22.2M/41.5M [06:41<05:42, 59.1kB/s]
     54%|#####3    | 22.2M/41.5M [06:41<04:46, 70.5kB/s]
     54%|#####3    | 22.2M/41.5M [06:41
 <05:15, 64.0kB/s]
     54%|#####3    | 22.3M/41.5M [06:41<04:32, 73.9kB/s]
     54%|#####3    | 22.3M/41.5M [06:41<05:23, 62.3kB/s]
     54%|#####3    | 22.3M/41.5M [06:42<04:43, 71.0kB/s]
     54%|#####3    | 22.3M/41.5M [06:42<04:18, 77.8kB/s]
     54%|#####3    | 22.3M/41.5M [06:42<04:17, 77.9kB/s]
     54%|#####3    | 22.3M/41.5M [06:42<04:27, 75.0kB/s]
     54%|#####3    | 22.3M/41.5M [06:42<04:57, 67.4kB/s]
     54%|#####3    | 22.4M/41.5M [06:42<04:23, 76.1kB/s]
     54%|#####3    | 22.4M/41.5M [06:43<04:03, 82.4kB/s]
     54%|#####3    | 22.4M/41.5M [06:43<04:59, 67.0kB/s]
     54%|#####3    | 22.4M/41.5M [06:43<05:40, 58.8kB/s]
     54%|#####4    | 22.4M/41.5M [06:43<05:11, 64.3kB/s]
     54%|#####4    | 22.4M/41.5M [06:44<06:11, 53.8kB/s]
     54%|#####4    | 22.4M/41.5M [06:44<06:20, 52.5kB/s]
     54%|#####4    | 22.4M/41.5M [06:44<06:27, 51.6kB/s]
     54%|#####4    | 22.4M/41.5M [06:44<06:33, 50.8kB/s]
     54%|#####4    | 22.4M/41.5M [06:44<06:37, 50.2kB/s]
     54%|#
 ####4    | 22.5M/41.5M [06:44<06:41, 49.8kB/s]
     54%|#####4    | 22.5M/41.5M [06:45<06:43, 49.4kB/s]
     54%|#####4    | 22.5M/41.5M [06:45<05:14, 63.4kB/s]
     54%|#####4    | 22.5M/41.5M [06:45<05:37, 59.1kB/s]
     54%|#####4    | 22.5M/41.5M [06:45<04:42, 70.4kB/s]
     54%|#####4    | 22.5M/41.5M [06:45<04:13, 78.4kB/s]
     54%|#####4    | 22.5M/41.5M [06:46<04:45, 69.5kB/s]
     54%|#####4    | 22.5M/41.5M [06:46<04:15, 77.8kB/s]
     54%|#####4    | 22.6M/41.5M [06:46<03:57, 83.6kB/s]
     54%|#####4    | 22.6M/41.5M [06:46<05:28, 60.3kB/s]
     54%|#####4    | 22.6M/41.5M [06:46<03:24, 96.7kB/s]
     55%|#####4    | 22.6M/41.5M [06:47<03:38, 90.7kB/s]
     55%|#####4    | 22.6M/41.5M [06:47<03:34, 92.4kB/s]
     55%|#####4    | 22.6M/41.5M [06:47<03:15, 101kB/s] 
     55%|#####4    | 22.7M/41.5M [06:47<03:17, 99.8kB/s]
     55%|#####4    | 22.7M/41.5M [06:47<03:19, 99.1kB/s]
     55%|#####4    | 22.7M/41.5M [06:47<03:19, 98.6kB/s]
     55%|#####4    | 22.7M/41.5M [06:4
 8<04:05, 80.1kB/s]
     55%|#####4    | 22.7M/41.5M [06:48<04:14, 77.2kB/s]
     55%|#####4    | 22.8M/41.5M [06:48<03:43, 87.7kB/s]
     55%|#####4    | 22.8M/41.5M [06:49<04:09, 78.7kB/s]
     55%|#####4    | 22.8M/41.5M [06:49<04:17, 76.2kB/s]
     55%|#####4    | 22.8M/41.5M [06:49<04:15, 76.7kB/s]
     55%|#####4    | 22.8M/41.5M [06:49<03:58, 82.2kB/s]
     55%|#####5    | 22.8M/41.5M [06:49<04:10, 78.0kB/s]
     55%|#####5    | 22.8M/41.5M [06:50<04:56, 65.8kB/s]
     55%|#####5    | 22.9M/41.5M [06:50<03:52, 84.1kB/s]
     55%|#####5    | 22.9M/41.5M [06:50<04:38, 70.1kB/s]
     55%|#####5    | 22.9M/41.5M [06:50<04:16, 76.1kB/s]
     55%|#####5    | 22.9M/41.5M [06:50<04:00, 81.2kB/s]
     55%|#####5    | 22.9M/41.5M [06:51<03:48, 85.3kB/s]
     55%|#####5    | 22.9M/41.5M [06:51<03:39, 88.5kB/s]
     55%|#####5    | 23.0M/41.5M [06:51<03:33, 90.9kB/s]
     55%|#####5    | 23.0M/41.5M [06:51<03:29, 92.7kB/s]
     55%|#####5    | 23.0M/41.5M [06:51<04:25, 73.0kB/s]
     55%|
 #####5    | 23.0M/41.5M [06:52<04:05, 78.9kB/s]
     55%|#####5    | 23.0M/41.5M [06:52<03:51, 83.6kB/s]
     56%|#####5    | 23.0M/41.5M [06:52<04:41, 68.8kB/s]
     56%|#####5    | 23.1M/41.5M [06:52<03:43, 86.6kB/s]
     56%|#####5    | 23.1M/41.5M [06:52<03:36, 89.2kB/s]
     56%|#####5    | 23.1M/41.5M [06:53<05:21, 59.9kB/s]
     56%|#####5    | 23.1M/41.5M [06:53<04:46, 67.3kB/s]
     56%|#####5    | 23.1M/41.5M [06:53<04:20, 73.9kB/s]
     56%|#####5    | 23.1M/41.5M [06:53<04:02, 79.5kB/s]
     56%|#####5    | 23.2M/41.5M [06:54<03:48, 84.0kB/s]
     56%|#####5    | 23.2M/41.5M [06:54<03:39, 87.6kB/s]
     56%|#####5    | 23.2M/41.5M [06:54<03:32, 90.2kB/s]
     56%|#####5    | 23.2M/41.5M [06:54<04:26, 71.9kB/s]
     56%|#####5    | 23.2M/41.5M [06:54<03:33, 89.6kB/s]
     56%|#####6    | 23.2M/41.5M [06:55<03:28, 91.6kB/s]
     56%|#####6    | 23.3M/41.5M [06:55<03:25, 93.1kB/s]
     56%|#####6    | 23.3M/41.5M [06:55<04:32, 70.0kB/s]
     56%|#####6    | 23.3M/41.5M [06:
 55<04:52, 65.2kB/s]
     56%|#####6    | 23.3M/41.5M [06:56<03:50, 82.7kB/s]
     56%|#####6    | 23.3M/41.5M [06:56<03:40, 86.2kB/s]
     56%|#####6    | 23.3M/41.5M [06:56<03:34, 88.9kB/s]
     56%|#####6    | 23.4M/41.5M [06:56<05:06, 62.1kB/s]
     56%|#####6    | 23.4M/41.5M [06:57<04:48, 65.8kB/s]
     56%|#####6    | 23.4M/41.5M [06:57<04:35, 68.9kB/s]
     56%|#####6    | 23.4M/41.5M [06:57<04:53, 64.6kB/s]
     56%|#####6    | 23.4M/41.5M [06:57<05:11, 60.9kB/s]
     56%|#####6    | 23.4M/41.5M [06:58<04:30, 69.9kB/s]
     56%|#####6    | 23.4M/41.5M [06:58<04:54, 64.4kB/s]
     57%|#####6    | 23.5M/41.5M [06:58<04:17, 73.3kB/s]
     57%|#####6    | 23.5M/41.5M [06:58<04:44, 66.4kB/s]
     57%|#####6    | 23.5M/41.5M [06:58<04:10, 75.3kB/s]
     57%|#####6    | 23.5M/41.5M [06:58<03:51, 81.6kB/s]
     57%|#####6    | 23.5M/41.5M [06:59<03:38, 86.2kB/s]
     57%|#####6    | 23.5M/41.5M [06:59<03:30, 89.5kB/s]
     57%|#####6    | 23.5M/41.5M [06:59<03:24, 91.8kB/s]
     57%
 |#####6    | 23.6M/41.5M [06:59<03:21, 93.5kB/s]
     57%|#####6    | 23.6M/41.5M [06:59<03:18, 94.6kB/s]
     57%|#####6    | 23.6M/41.5M [06:59<03:16, 95.4kB/s]
     57%|#####6    | 23.6M/41.5M [07:00<03:15, 96.0kB/s]
     57%|#####6    | 23.6M/41.5M [07:00<03:14, 96.4kB/s]
     57%|#####6    | 23.6M/41.5M [07:00<03:29, 89.6kB/s]
     57%|#####7    | 23.7M/41.5M [07:00<03:47, 82.3kB/s]
     57%|#####7    | 23.7M/41.5M [07:00<02:40, 116kB/s] 
     57%|#####7    | 23.7M/41.5M [07:01<02:47, 111kB/s]
     57%|#####7    | 23.7M/41.5M [07:01<02:53, 107kB/s]
     57%|#####7    | 23.7M/41.5M [07:01<03:48, 81.3kB/s]
     57%|#####7    | 23.8M/41.5M [07:02<05:33, 55.8kB/s]
     57%|#####7    | 23.8M/41.5M [07:02<03:07, 99.0kB/s]
     57%|#####7    | 23.8M/41.5M [07:02<03:07, 98.6kB/s]
     57%|#####7    | 23.8M/41.5M [07:02<03:08, 98.3kB/s]
     57%|#####7    | 23.8M/41.5M [07:02<03:08, 98.1kB/s]
     58%|#####7    | 23.9M/41.5M [07:03<03:57, 77.7kB/s]
     58%|#####7    | 23.9M/41.5M [07:0
 3<03:18, 93.2kB/s]
     58%|#####7    | 23.9M/41.5M [07:03<03:15, 94.2kB/s]
     58%|#####7    | 23.9M/41.5M [07:03<04:54, 62.5kB/s]
     58%|#####7    | 23.9M/41.5M [07:04<04:25, 69.3kB/s]
     58%|#####7    | 23.9M/41.5M [07:04<04:57, 61.8kB/s]
     58%|#####7    | 24.0M/41.5M [07:04<05:20, 57.4kB/s]
     58%|#####7    | 24.0M/41.5M [07:05<08:43, 35.1kB/s]
     58%|#####7    | 24.0M/41.5M [07:05<08:28, 36.1kB/s]
     58%|#####7    | 24.0M/41.5M [07:05<09:06, 33.6kB/s]
     58%|#####7    | 24.0M/41.5M [07:06<08:42, 35.1kB/s]
     58%|#####7    | 24.0M/41.5M [07:06<08:05, 37.8kB/s]
     58%|#####7    | 24.0M/41.5M [07:06<08:58, 34.1kB/s]
     58%|#####7    | 24.0M/41.5M [07:06<08:13, 37.1kB/s]
     58%|#####7    | 24.0M/41.5M [07:07<11:32, 26.5kB/s]
     58%|#####7    | 24.0M/41.5M [07:07<07:46, 39.2kB/s]
     58%|#####7    | 24.0M/41.5M [07:07<07:24, 41.1kB/s]
     58%|#####7    | 24.1M/41.5M [07:07<07:06, 42.8kB/s]
     58%|#####7    | 24.1M/41.5M [07:08<06:52, 44.3kB/s]
     58%|
 #####8    | 24.1M/41.5M [07:08<06:42, 45.4kB/s]
     58%|#####8    | 24.1M/41.5M [07:08<06:34, 46.3kB/s]
     58%|#####8    | 24.1M/41.5M [07:08<06:57, 43.7kB/s]
     58%|#####8    | 24.1M/41.5M [07:09<09:54, 30.7kB/s]
     58%|#####8    | 24.1M/41.5M [07:09<11:36, 26.2kB/s]
     58%|#####8    | 24.1M/41.5M [07:09<10:01, 30.3kB/s]
     58%|#####8    | 24.1M/41.5M [07:09<10:44, 28.2kB/s]
     58%|#####8    | 24.1M/41.5M [07:10<11:15, 27.0kB/s]
     58%|#####8    | 24.1M/41.5M [07:10<11:37, 26.1kB/s]
     58%|#####8    | 24.1M/41.5M [07:11<13:43, 22.1kB/s]
     58%|#####8    | 24.1M/41.5M [07:11<11:28, 26.4kB/s]
     58%|#####8    | 24.2M/41.5M [07:11<11:46, 25.7kB/s]
     58%|#####8    | 24.2M/41.5M [07:11<10:06, 30.0kB/s]
     58%|#####8    | 24.2M/41.5M [07:12<10:48, 28.0kB/s]
     58%|#####8    | 24.2M/41.5M [07:12<09:25, 32.1kB/s]
     58%|#####8    | 24.2M/41.5M [07:12<10:19, 29.3kB/s]
     58%|#####8    | 24.2M/41.5M [07:12<09:05, 33.3kB/s]
     58%|#####8    | 24.2M/41.5M [07:
 13<10:04, 30.0kB/s]
     58%|#####8    | 24.2M/41.5M [07:13<06:51, 44.0kB/s]
     58%|#####8    | 24.2M/41.5M [07:13<06:41, 45.1kB/s]
     58%|#####8    | 24.2M/41.5M [07:13<06:33, 46.0kB/s]
     58%|#####8    | 24.2M/41.5M [07:13<06:27, 46.7kB/s]
     58%|#####8    | 24.2M/41.5M [07:13<06:22, 47.2kB/s]
     58%|#####8    | 24.3M/41.5M [07:14<06:19, 47.6kB/s]
     58%|#####8    | 24.3M/41.5M [07:14<06:16, 47.9kB/s]
     59%|#####8    | 24.3M/41.5M [07:14<08:03, 37.3kB/s]
     59%|#####8    | 24.3M/41.5M [07:14<05:47, 51.9kB/s]
     59%|#####8    | 24.3M/41.5M [07:14<05:53, 51.1kB/s]
     59%|#####8    | 24.3M/41.5M [07:15<05:57, 50.4kB/s]
     59%|#####8    | 24.3M/41.5M [07:15<06:00, 49.9kB/s]
     59%|#####8    | 24.3M/41.5M [07:15<04:43, 63.4kB/s]
     59%|#####8    | 24.3M/41.5M [07:15<05:04, 59.1kB/s]
     59%|#####8    | 24.3M/41.5M [07:15<05:20, 56.1kB/s]
     59%|#####8    | 24.4M/41.5M [07:16<07:12, 41.6kB/s]
     59%|#####8    | 24.4M/41.5M [07:16<06:54, 43.4kB/s]
     59%
 |#####8    | 24.4M/41.5M [07:16<06:34, 45.5kB/s]
     59%|#####8    | 24.4M/41.5M [07:16<06:07, 48.8kB/s]
     59%|#####8    | 24.4M/41.5M [07:17<06:25, 46.5kB/s]
     59%|#####8    | 24.4M/41.5M [07:17<06:21, 47.0kB/s]
     59%|#####8    | 24.4M/41.5M [07:17<06:17, 47.4kB/s]
     59%|#####8    | 24.4M/41.5M [07:18<13:05, 22.8kB/s]
     59%|#####8    | 24.4M/41.5M [07:18<07:50, 38.0kB/s]
     59%|#####8    | 24.5M/41.5M [07:18<07:29, 39.7kB/s]
     59%|#####8    | 24.5M/41.5M [07:19<07:10, 41.4kB/s]
     59%|#####8    | 24.5M/41.5M [07:19<06:55, 43.0kB/s]
     59%|#####8    | 24.5M/41.5M [07:19<06:42, 44.3kB/s]
     59%|#####9    | 24.5M/41.5M [07:19<06:32, 45.4kB/s]
     59%|#####9    | 24.5M/41.5M [07:19<08:06, 36.6kB/s]
     59%|#####9    | 24.5M/41.5M [07:20<07:13, 41.1kB/s]
     59%|#####9    | 24.5M/41.5M [07:20<06:56, 42.7kB/s]
     59%|#####9    | 24.5M/41.5M [07:20<06:43, 44.1kB/s]
     59%|#####9    | 24.5M/41.5M [07:20<06:33, 45.2kB/s]
     59%|#####9    | 24.5M/41.5M [07
 :21<07:40, 38.6kB/s]
     59%|#####9    | 24.6M/41.5M [07:21<11:53, 24.9kB/s]
     59%|#####9    | 24.6M/41.5M [07:22<08:57, 33.0kB/s]
     59%|#####9    | 24.6M/41.5M [07:22<07:02, 42.0kB/s]
     59%|#####9    | 24.6M/41.5M [07:22<06:50, 43.1kB/s]
     59%|#####9    | 24.6M/41.5M [07:23<07:57, 37.0kB/s]
     59%|#####9    | 24.6M/41.5M [07:23<07:31, 39.2kB/s]
     59%|#####9    | 24.6M/41.5M [07:23<08:39, 34.0kB/s]
     59%|#####9    | 24.6M/41.5M [07:23<07:58, 36.9kB/s]
     59%|#####9    | 24.6M/41.5M [07:23<05:51, 50.3kB/s]
     59%|#####9    | 24.7M/41.5M [07:24<05:53, 49.9kB/s]
     59%|#####9    | 24.7M/41.5M [07:24<05:55, 49.6kB/s]
     59%|#####9    | 24.7M/41.5M [07:24<07:33, 38.9kB/s]
     60%|#####9    | 24.7M/41.5M [07:24<04:36, 63.6kB/s]
     60%|#####9    | 24.7M/41.5M [07:24<04:53, 59.9kB/s]
     60%|#####9    | 24.7M/41.5M [07:25<05:08, 57.0kB/s]
     60%|#####9    | 24.7M/41.5M [07:25<05:21, 54.7kB/s]
     60%|#####9    | 24.7M/41.5M [07:25<05:31, 53.0kB/s]
     60
 %|#####9    | 24.7M/41.5M [07:25<04:27, 65.7kB/s]
     60%|#####9    | 24.8M/41.5M [07:25<04:48, 60.7kB/s]
     60%|#####9    | 24.8M/41.5M [07:25<04:05, 71.5kB/s]
     60%|#####9    | 24.8M/41.5M [07:26<04:30, 64.7kB/s]
     60%|#####9    | 24.8M/41.5M [07:26<03:55, 74.4kB/s]
     60%|#####9    | 24.8M/41.5M [07:26<03:35, 81.2kB/s]
     60%|#####9    | 24.8M/41.5M [07:26<04:04, 71.5kB/s]
     60%|#####9    | 24.8M/41.5M [07:26<03:40, 79.2kB/s]
     60%|#####9    | 24.8M/41.5M [07:26<03:26, 84.6kB/s]
     60%|#####9    | 24.9M/41.5M [07:27<04:16, 68.0kB/s]
     60%|#####9    | 24.9M/41.5M [07:27<04:58, 58.4kB/s]
     60%|######    | 24.9M/41.5M [07:27<03:51, 75.2kB/s]
     60%|######    | 24.9M/41.5M [07:28<04:36, 62.9kB/s]
     60%|######    | 24.9M/41.5M [07:28<04:34, 63.3kB/s]
     60%|######    | 24.9M/41.5M [07:28<04:49, 60.0kB/s]
     60%|######    | 24.9M/41.5M [07:28<04:33, 63.3kB/s]
     60%|######    | 25.0M/41.5M [07:28<04:24, 65.6kB/s]
     60%|######    | 25.0M/41.5M [0
 7:29<04:13, 68.2kB/s]
     60%|######    | 25.0M/41.5M [07:29<04:11, 68.8kB/s]
     60%|######    | 25.0M/41.5M [07:29<05:48, 49.6kB/s]
     60%|######    | 25.0M/41.5M [07:30<06:07, 47.0kB/s]
     60%|######    | 25.0M/41.5M [07:30<04:45, 60.5kB/s]
     60%|######    | 25.0M/41.5M [07:30<06:09, 46.7kB/s]
     60%|######    | 25.0M/41.5M [07:30<06:06, 47.1kB/s]
     60%|######    | 25.0M/41.5M [07:30<06:03, 47.5kB/s]
     60%|######    | 25.1M/41.5M [07:30<06:00, 47.8kB/s]
     60%|######    | 25.1M/41.5M [07:31<05:58, 48.0kB/s]
     60%|######    | 25.1M/41.5M [07:31<07:12, 39.7kB/s]
     61%|######    | 25.1M/41.5M [07:31<04:40, 61.2kB/s]
     61%|######    | 25.1M/41.5M [07:32<04:54, 58.4kB/s]
     61%|######    | 25.1M/41.5M [07:32<05:06, 56.0kB/s]
     61%|######    | 25.1M/41.5M [07:32<06:38, 43.1kB/s]
     61%|######    | 25.1M/41.5M [07:32<05:10, 55.3kB/s]
     61%|######    | 25.1M/41.5M [07:32<05:18, 53.7kB/s]
     61%|######    | 25.2M/41.5M [07:33<05:26, 52.4kB/s]
     6
 1%|######    | 25.2M/41.5M [07:33<04:42, 60.5kB/s]
     61%|######    | 25.2M/41.5M [07:33<04:39, 61.3kB/s]
     61%|######    | 25.2M/41.5M [07:33<06:21, 44.8kB/s]
     61%|######    | 25.2M/41.5M [07:33<05:33, 51.3kB/s]
     61%|######    | 25.2M/41.5M [07:34<05:06, 55.6kB/s]
     61%|######    | 25.2M/41.5M [07:34<05:52, 48.4kB/s]
     61%|######    | 25.2M/41.5M [07:34<05:37, 50.5kB/s]
     61%|######    | 25.3M/41.5M [07:35<05:13, 54.2kB/s]
     61%|######    | 25.3M/41.5M [07:35<05:21, 52.9kB/s]
     61%|######    | 25.3M/41.5M [07:35<04:57, 57.2kB/s]
     61%|######    | 25.3M/41.5M [07:35<06:38, 42.7kB/s]
     61%|######    | 25.3M/41.5M [07:35<05:32, 51.1kB/s]
     61%|######    | 25.3M/41.5M [07:36<05:35, 50.5kB/s]
     61%|######1   | 25.3M/41.5M [07:36<05:38, 50.1kB/s]
     61%|######1   | 25.3M/41.5M [07:36<04:57, 56.9kB/s]
     61%|######1   | 25.3M/41.5M [07:36<04:39, 60.7kB/s]
     61%|######1   | 25.3M/41.5M [07:36<06:21, 44.4kB/s]
     61%|######1   | 25.4M/41.5M [
 07:37<06:07, 46.1kB/s]
     61%|######1   | 25.4M/41.5M [07:37<06:02, 46.6kB/s]
     61%|######1   | 25.4M/41.5M [07:37<05:58, 47.1kB/s]
     61%|######1   | 25.4M/41.5M [07:37<04:42, 59.8kB/s]
     61%|######1   | 25.4M/41.5M [07:37<04:56, 56.8kB/s]
     61%|######1   | 25.4M/41.5M [07:38<05:08, 54.6kB/s]
     61%|######1   | 25.4M/41.5M [07:38<04:56, 56.8kB/s]
     61%|######1   | 25.4M/41.5M [07:38<04:21, 64.3kB/s]
     61%|######1   | 25.4M/41.5M [07:38<04:21, 64.3kB/s]
     61%|######1   | 25.5M/41.5M [07:38<03:46, 74.4kB/s]
     61%|######1   | 25.5M/41.5M [07:38<03:41, 75.8kB/s]
     61%|######1   | 25.5M/41.5M [07:39<03:49, 73.1kB/s]
     61%|######1   | 25.5M/41.5M [07:39<03:28, 80.4kB/s]
     61%|######1   | 25.5M/41.5M [07:39<03:15, 85.6kB/s]
     62%|######1   | 25.5M/41.5M [07:39<03:07, 89.2kB/s]
     62%|######1   | 25.5M/41.5M [07:39<03:02, 91.6kB/s]
     62%|######1   | 25.6M/41.5M [07:39<02:58, 93.4kB/s]
     62%|######1   | 25.6M/41.5M [07:40<03:23, 82.1kB/s]
     
 62%|######1   | 25.6M/41.5M [07:40<03:56, 70.6kB/s]
     62%|######1   | 25.6M/41.5M [07:40<03:11, 86.9kB/s]
     62%|######1   | 25.6M/41.5M [07:40<03:17, 84.4kB/s]
     62%|######1   | 25.6M/41.5M [07:41<03:55, 70.5kB/s]
     62%|######1   | 25.7M/41.5M [07:41<03:59, 69.4kB/s]
     62%|######1   | 25.7M/41.5M [07:41<04:18, 64.3kB/s]
     62%|######1   | 25.7M/41.5M [07:41<04:35, 60.2kB/s]
     62%|######1   | 25.7M/41.5M [07:41<03:55, 70.3kB/s]
     62%|######1   | 25.7M/41.5M [07:42<04:17, 64.2kB/s]
     62%|######1   | 25.7M/41.5M [07:42<05:04, 54.3kB/s]
     62%|######2   | 25.7M/41.5M [07:42<05:16, 52.2kB/s]
     62%|######2   | 25.7M/41.5M [07:42<04:27, 61.7kB/s]
     62%|######2   | 25.8M/41.5M [07:43<04:41, 58.6kB/s]
     62%|######2   | 25.8M/41.5M [07:43<04:35, 59.8kB/s]
     62%|######2   | 25.8M/41.5M [07:43<04:11, 65.5kB/s]
     62%|######2   | 25.8M/41.5M [07:43<04:11, 65.4kB/s]
     62%|######2   | 25.8M/41.5M [07:43<03:56, 69.6kB/s]
     62%|######2   | 25.8M/41.5M 
 [07:43<04:17, 63.8kB/s]
     62%|######2   | 25.8M/41.5M [07:44<05:35, 49.0kB/s]
     62%|######2   | 25.8M/41.5M [07:44<05:53, 46.4kB/s]
     62%|######2   | 25.9M/41.5M [07:44<04:53, 55.8kB/s]
     62%|######2   | 25.9M/41.5M [07:45<04:27, 61.2kB/s]
     62%|######2   | 25.9M/41.5M [07:45<04:39, 58.5kB/s]
     62%|######2   | 25.9M/41.5M [07:45<04:51, 56.2kB/s]
     62%|######2   | 25.9M/41.5M [07:45<04:43, 57.8kB/s]
     62%|######2   | 25.9M/41.5M [07:45<04:55, 55.2kB/s]
     62%|######2   | 25.9M/41.5M [07:46<04:19, 62.9kB/s]
     62%|######2   | 25.9M/41.5M [07:46<04:18, 63.1kB/s]
     62%|######2   | 25.9M/41.5M [07:46<04:37, 58.8kB/s]
     63%|######2   | 25.9M/41.5M [07:46<05:00, 54.2kB/s]
     63%|######2   | 26.0M/41.5M [07:46<03:44, 72.5kB/s]
     63%|######2   | 26.0M/41.5M [07:46<03:49, 70.8kB/s]
     63%|######2   | 26.0M/41.5M [07:47<04:11, 64.7kB/s]
     63%|######2   | 26.0M/41.5M [07:47<04:56, 54.8kB/s]
     63%|######2   | 26.0M/41.5M [07:47<04:11, 64.4kB/s]
    
  63%|######2   | 26.0M/41.5M [07:47<04:12, 64.2kB/s]
     63%|######2   | 26.0M/41.5M [07:48<05:34, 48.4kB/s]
     63%|######2   | 26.0M/41.5M [07:48<05:34, 48.4kB/s]
     63%|######2   | 26.1M/41.5M [07:48<05:33, 48.5kB/s]
     63%|######2   | 26.1M/41.5M [07:48<05:33, 48.5kB/s]
     63%|######2   | 26.1M/41.5M [07:49<07:02, 38.3kB/s]
     63%|######2   | 26.1M/41.5M [07:49<06:23, 42.2kB/s]
     63%|######2   | 26.1M/41.5M [07:49<06:10, 43.5kB/s]
     63%|######2   | 26.1M/41.5M [07:49<05:04, 52.9kB/s]
     63%|######2   | 26.1M/41.5M [07:50<07:19, 36.6kB/s]
     63%|######2   | 26.1M/41.5M [07:50<05:49, 46.1kB/s]
     63%|######3   | 26.1M/41.5M [07:50<05:45, 46.6kB/s]
     63%|######3   | 26.1M/41.5M [07:50<05:41, 47.1kB/s]
     63%|######3   | 26.2M/41.5M [07:51<05:38, 47.5kB/s]
     63%|######3   | 26.2M/41.5M [07:51<07:04, 37.9kB/s]
     63%|######3   | 26.2M/41.5M [07:51<06:23, 41.9kB/s]
     63%|######3   | 26.2M/41.5M [07:51<06:10, 43.3kB/s]
     63%|######3   | 26.2M/41.5M
  [07:52<07:22, 36.2kB/s]
     63%|######3   | 26.2M/41.5M [07:52<05:26, 49.1kB/s]
     63%|######3   | 26.2M/41.5M [07:52<05:26, 49.0kB/s]
     63%|######3   | 26.2M/41.5M [07:52<06:48, 39.2kB/s]
     63%|######3   | 26.2M/41.5M [07:53<06:27, 41.3kB/s]
     63%|######3   | 26.2M/41.5M [07:53<06:11, 43.0kB/s]
     63%|######3   | 26.2M/41.5M [07:53<05:59, 44.5kB/s]
     63%|######3   | 26.3M/41.5M [07:53<07:24, 35.9kB/s]
     63%|######3   | 26.3M/41.5M [07:54<06:31, 40.8kB/s]
     63%|######3   | 26.3M/41.5M [07:54<06:15, 42.4kB/s]
     63%|######3   | 26.3M/41.5M [07:54<06:03, 43.9kB/s]
     63%|######3   | 26.3M/41.5M [07:54<05:53, 45.1kB/s]
     63%|######3   | 26.3M/41.5M [07:54<05:45, 46.0kB/s]
     63%|######3   | 26.3M/41.5M [07:54<05:40, 46.7kB/s]
     63%|######3   | 26.3M/41.5M [07:55<05:36, 47.3kB/s]
     63%|######3   | 26.3M/41.5M [07:55<05:33, 47.7kB/s]
     63%|######3   | 26.3M/41.5M [07:55<05:31, 48.0kB/s]
     63%|######3   | 26.3M/41.5M [07:55<05:30, 48.1kB/s]
   
   64%|######3   | 26.4M/41.5M [07:55<05:28, 48.3kB/s]
     64%|######3   | 26.4M/41.5M [07:55<05:27, 48.4kB/s]
     64%|######3   | 26.4M/41.5M [07:56<05:27, 48.5kB/s]
     64%|######3   | 26.4M/41.5M [07:56<04:11, 63.0kB/s]
     64%|######3   | 26.4M/41.5M [07:56<04:29, 58.7kB/s]
     64%|######3   | 26.4M/41.5M [07:56<04:43, 55.7kB/s]
     64%|######3   | 26.4M/41.5M [07:56<03:51, 68.2kB/s]
     64%|######3   | 26.4M/41.5M [07:56<04:13, 62.3kB/s]
     64%|######3   | 26.4M/41.5M [07:57<03:36, 72.8kB/s]
     64%|######3   | 26.4M/41.5M [07:57<04:00, 65.6kB/s]
     64%|######3   | 26.5M/41.5M [07:57<03:44, 70.1kB/s]
     64%|######3   | 26.5M/41.5M [07:57<04:59, 52.5kB/s]
     64%|######3   | 26.5M/41.5M [07:57<03:26, 76.0kB/s]
     64%|######3   | 26.5M/41.5M [07:58<03:24, 76.7kB/s]
     64%|######3   | 26.5M/41.5M [07:58<03:49, 68.5kB/s]
     64%|######3   | 26.5M/41.5M [07:58<03:28, 75.4kB/s]
     64%|######4   | 26.6M/41.5M [07:58<03:13, 80.9kB/s]
     64%|######4   | 26.6M/41.5
 M [07:58<03:03, 85.2kB/s]
     64%|######4   | 26.6M/41.5M [07:59<02:56, 88.5kB/s]
     64%|######4   | 26.6M/41.5M [07:59<03:03, 85.2kB/s]
     64%|######4   | 26.6M/41.5M [07:59<02:56, 88.6kB/s]
     64%|######4   | 26.6M/41.5M [07:59<04:16, 60.8kB/s]
     64%|######4   | 26.7M/41.5M [08:00<02:54, 89.1kB/s]
     64%|######4   | 26.7M/41.5M [08:00<02:50, 91.0kB/s]
     64%|######4   | 26.7M/41.5M [08:00<03:29, 74.1kB/s]
     64%|######4   | 26.7M/41.5M [08:00<02:51, 90.1kB/s]
     64%|######4   | 26.7M/41.5M [08:01<03:29, 73.9kB/s]
     64%|######4   | 26.8M/41.5M [08:01<02:52, 89.5kB/s]
     65%|######4   | 26.8M/41.5M [08:01<04:08, 62.0kB/s]
     65%|######4   | 26.8M/41.5M [08:01<03:18, 77.6kB/s]
     65%|######4   | 26.8M/41.5M [08:02<03:17, 77.8kB/s]
     65%|######4   | 26.8M/41.5M [08:02<03:38, 70.3kB/s]
     65%|######4   | 26.8M/41.5M [08:02<03:21, 76.1kB/s]
     65%|######4   | 26.9M/41.5M [08:02<03:09, 81.1kB/s]
     65%|######4   | 26.9M/41.5M [08:02<03:00, 85.1kB/s]
  
    65%|######4   | 26.9M/41.5M [08:03<02:53, 88.3kB/s]
     65%|######4   | 26.9M/41.5M [08:03<03:11, 80.0kB/s]
     65%|######4   | 26.9M/41.5M [08:03<03:00, 84.5kB/s]
     65%|######4   | 26.9M/41.5M [08:03<03:17, 77.2kB/s]
     65%|######4   | 27.0M/41.5M [08:03<03:05, 82.3kB/s]
     65%|######5   | 27.0M/41.5M [08:04<02:56, 86.3kB/s]
     65%|######5   | 27.0M/41.5M [08:04<02:50, 89.3kB/s]
     65%|######5   | 27.0M/41.5M [08:04<03:32, 71.5kB/s]
     65%|######5   | 27.0M/41.5M [08:04<02:40, 94.8kB/s]
     65%|######5   | 27.0M/41.5M [08:05<02:49, 89.5kB/s]
     65%|######5   | 27.1M/41.5M [08:05<03:29, 72.4kB/s]
     65%|######5   | 27.1M/41.5M [08:05<02:39, 94.6kB/s]
     65%|######5   | 27.1M/41.5M [08:05<03:08, 80.0kB/s]
     65%|######5   | 27.1M/41.5M [08:06<03:52, 64.8kB/s]
     65%|######5   | 27.1M/41.5M [08:06<03:19, 75.4kB/s]
     65%|######5   | 27.1M/41.5M [08:06<03:06, 80.6kB/s]
     65%|######5   | 27.2M/41.5M [08:06<04:26, 56.3kB/s]
     65%|######5   | 27.2M/41.
 5M [08:07<05:27, 45.9kB/s]
     65%|######5   | 27.2M/41.5M [08:07<05:23, 46.4kB/s]
     66%|######5   | 27.2M/41.5M [08:07<04:22, 57.1kB/s]
     66%|######5   | 27.2M/41.5M [08:07<05:34, 44.8kB/s]
     66%|######5   | 27.2M/41.5M [08:08<05:28, 45.6kB/s]
     66%|######5   | 27.2M/41.5M [08:08<05:23, 46.3kB/s]
     66%|######5   | 27.2M/41.5M [08:08<05:18, 46.9kB/s]
     66%|######5   | 27.2M/41.5M [08:08<05:15, 47.4kB/s]
     66%|######5   | 27.2M/41.5M [08:08<05:13, 47.7kB/s]
     66%|######5   | 27.2M/41.5M [08:08<04:02, 61.6kB/s]
     66%|######5   | 27.3M/41.5M [08:09<04:17, 57.9kB/s]
     66%|######5   | 27.3M/41.5M [08:09<04:30, 55.2kB/s]
     66%|######5   | 27.3M/41.5M [08:09<06:01, 41.2kB/s]
     66%|######5   | 27.3M/41.5M [08:09<03:42, 66.8kB/s]
     66%|######5   | 27.3M/41.5M [08:09<03:18, 74.7kB/s]
     66%|######5   | 27.3M/41.5M [08:10<03:03, 80.8kB/s]
     66%|######5   | 27.3M/41.5M [08:10<03:54, 63.3kB/s]
     66%|######5   | 27.4M/41.5M [08:10<04:51, 50.8kB/s]
 
     66%|######5   | 27.4M/41.5M [08:11<03:45, 65.6kB/s]
     66%|######6   | 27.4M/41.5M [08:11<03:57, 62.2kB/s]
     66%|######6   | 27.4M/41.5M [08:11<04:09, 59.1kB/s]
     66%|######6   | 27.4M/41.5M [08:11<04:21, 56.6kB/s]
     66%|######6   | 27.4M/41.5M [08:11<04:30, 54.5kB/s]
     66%|######6   | 27.4M/41.5M [08:12<04:20, 56.7kB/s]
     66%|######6   | 27.4M/41.5M [08:12<03:50, 64.0kB/s]
     66%|######6   | 27.4M/41.5M [08:12<04:06, 59.7kB/s]
     66%|######6   | 27.5M/41.5M [08:12<04:02, 60.8kB/s]
     66%|######6   | 27.5M/41.5M [08:12<03:39, 66.9kB/s]
     66%|######6   | 27.5M/41.5M [08:12<03:14, 75.7kB/s]
     66%|######6   | 27.5M/41.5M [08:13<04:40, 52.4kB/s]
     66%|######6   | 27.5M/41.5M [08:13<03:03, 80.0kB/s]
     66%|######6   | 27.5M/41.5M [08:13<03:04, 79.4kB/s]
     66%|######6   | 27.5M/41.5M [08:13<03:05, 78.6kB/s]
     66%|######6   | 27.6M/41.5M [08:13<02:54, 83.5kB/s]
     66%|######6   | 27.6M/41.5M [08:14<03:09, 77.0kB/s]
     67%|######6   | 27.6M/41
 .5M [08:14<03:20, 72.6kB/s]
     67%|######6   | 27.6M/41.5M [08:14<03:05, 78.7kB/s]
     67%|######6   | 27.6M/41.5M [08:14<03:07, 77.5kB/s]
     67%|######6   | 27.6M/41.5M [08:14<03:11, 75.9kB/s]
     67%|######6   | 27.6M/41.5M [08:15<03:16, 73.7kB/s]
     67%|######6   | 27.7M/41.5M [08:15<04:08, 58.4kB/s]
     67%|######6   | 27.7M/41.5M [08:15<04:12, 57.5kB/s]
     67%|######6   | 27.7M/41.5M [08:16<04:31, 53.3kB/s]
     67%|######6   | 27.7M/41.5M [08:16<05:21, 45.0kB/s]
     67%|######6   | 27.7M/41.5M [08:16<04:06, 58.6kB/s]
     67%|######6   | 27.7M/41.5M [08:17<04:47, 50.2kB/s]
     67%|######6   | 27.8M/41.5M [08:17<05:02, 47.6kB/s]
     67%|######6   | 27.8M/41.5M [08:17<06:58, 34.4kB/s]
     67%|######6   | 27.8M/41.5M [08:18<05:21, 44.7kB/s]
     67%|######6   | 27.8M/41.5M [08:18<05:16, 45.5kB/s]
     67%|######6   | 27.8M/41.5M [08:18<05:11, 46.2kB/s]
     67%|######6   | 27.8M/41.5M [08:18<06:19, 37.8kB/s]
     67%|######7   | 27.8M/41.5M [08:19<05:44, 41.6kB/s]
      67%|######7   | 27.8M/41.5M [08:19<06:40, 35.8kB/s]
     67%|######7   | 27.8M/41.5M [08:19<07:26, 32.1kB/s]
     67%|######7   | 27.8M/41.5M [08:20<06:46, 35.2kB/s]
     67%|######7   | 27.8M/41.5M [08:20<07:35, 31.4kB/s]
     67%|######7   | 27.9M/41.5M [08:20<09:21, 25.5kB/s]
     67%|######7   | 27.9M/41.5M [08:21<08:56, 26.7kB/s]
     67%|######7   | 27.9M/41.5M [08:21<10:35, 22.5kB/s]
     67%|######7   | 27.9M/41.5M [08:21<08:26, 28.2kB/s]
     67%|######7   | 27.9M/41.5M [08:22<07:33, 31.4kB/s]
     67%|######7   | 27.9M/41.5M [08:22<08:07, 29.2kB/s]
     67%|######7   | 27.9M/41.5M [08:22<07:14, 32.7kB/s]
     67%|######7   | 27.9M/41.5M [08:22<06:35, 36.0kB/s]
     67%|######7   | 27.9M/41.5M [08:23<06:05, 38.9kB/s]
     67%|######7   | 27.9M/41.5M [08:23<05:44, 41.3kB/s]
     67%|######7   | 27.9M/41.5M [08:23<05:28, 43.2kB/s]
     67%|######7   | 27.9M/41.5M [08:23<06:43, 35.2kB/s]
     67%|######7   | 28.0M/41.5M [08:24<06:58, 33.9kB/s]
     67%|######7   | 28.0M/4
 1.5M [08:24<05:24, 43.6kB/s]
     67%|######7   | 28.0M/41.5M [08:24<07:25, 31.8kB/s]
     67%|######7   | 28.0M/41.5M [08:25<05:29, 43.0kB/s]
     68%|######7   | 28.0M/41.5M [08:25<06:23, 36.9kB/s]
     68%|######7   | 28.0M/41.5M [08:25<06:19, 37.3kB/s]
     68%|######7   | 28.0M/41.5M [08:25<06:50, 34.4kB/s]
     68%|######7   | 28.0M/41.5M [08:26<06:38, 35.4kB/s]
     68%|######7   | 28.0M/41.5M [08:26<06:08, 38.2kB/s]
     68%|######7   | 28.0M/41.5M [08:26<05:46, 40.7kB/s]
     68%|######7   | 28.1M/41.5M [08:26<05:29, 42.7kB/s]
     68%|######7   | 28.1M/41.5M [08:26<05:18, 44.3kB/s]
     68%|######7   | 28.1M/41.5M [08:26<03:59, 58.8kB/s]
     68%|######7   | 28.1M/41.5M [08:27<04:11, 56.0kB/s]
     68%|######7   | 28.1M/41.5M [08:27<03:26, 67.8kB/s]
     68%|######7   | 28.1M/41.5M [08:27<03:45, 62.2kB/s]
     68%|######7   | 28.1M/41.5M [08:27<04:00, 58.3kB/s]
     68%|######7   | 28.1M/41.5M [08:27<05:27, 42.8kB/s]
     68%|######7   | 28.1M/41.5M [08:28<04:09, 56.2kB/s]
 
     68%|######7   | 28.1M/41.5M [08:28<04:17, 54.2kB/s]
     68%|######7   | 28.2M/41.5M [08:28<03:48, 61.1kB/s]
     68%|######7   | 28.2M/41.5M [08:28<04:01, 57.8kB/s]
     68%|######7   | 28.2M/41.5M [08:28<04:12, 55.3kB/s]
     68%|######7   | 28.2M/41.5M [08:28<03:59, 58.2kB/s]
     68%|######7   | 28.2M/41.5M [08:29<05:51, 39.7kB/s]
     68%|######7   | 28.2M/41.5M [08:29<04:03, 57.3kB/s]
     68%|######8   | 28.2M/41.5M [08:29<04:13, 55.0kB/s]
     68%|######8   | 28.2M/41.5M [08:30<06:45, 34.3kB/s]
     68%|######8   | 28.2M/41.5M [08:30<04:55, 47.1kB/s]
     68%|######8   | 28.2M/41.5M [08:30<05:58, 38.7kB/s]
     68%|######8   | 28.3M/41.5M [08:30<05:40, 40.8kB/s]
     68%|######8   | 28.3M/41.5M [08:31<06:40, 34.7kB/s]
     68%|######8   | 28.3M/41.5M [08:31<04:49, 47.8kB/s]
     68%|######8   | 28.3M/41.5M [08:31<05:54, 39.0kB/s]
     68%|######8   | 28.3M/41.5M [08:32<07:04, 32.6kB/s]
     68%|######8   | 28.3M/41.5M [08:32<05:03, 45.4kB/s]
     68%|######8   | 28.3M/
 41.5M [08:32<05:55, 38.8kB/s]
     68%|######8   | 28.3M/41.5M [08:33<05:29, 41.8kB/s]
     68%|######8   | 28.4M/41.5M [08:33<05:19, 43.1kB/s]
     68%|######8   | 28.4M/41.5M [08:33<05:11, 44.2kB/s]
     68%|######8   | 28.4M/41.5M [08:33<05:04, 45.2kB/s]
     68%|######8   | 28.4M/41.5M [08:33<04:58, 46.1kB/s]
     68%|######8   | 28.4M/41.5M [08:33<04:54, 46.7kB/s]
     68%|######8   | 28.4M/41.5M [08:34<04:50, 47.3kB/s]
     68%|######8   | 28.4M/41.5M [08:34<03:44, 61.2kB/s]
     68%|######8   | 28.4M/41.5M [08:34<03:57, 57.6kB/s]
     69%|######8   | 28.4M/41.5M [08:34<05:20, 42.7kB/s]
     69%|######8   | 28.4M/41.5M [08:34<03:21, 67.8kB/s]
     69%|######8   | 28.5M/41.5M [08:35<04:33, 50.0kB/s]
     69%|######8   | 28.5M/41.5M [08:35<03:44, 60.7kB/s]
     69%|######8   | 28.5M/41.5M [08:35<03:56, 57.8kB/s]
     69%|######8   | 28.5M/41.5M [08:35<04:06, 55.4kB/s]
     69%|######8   | 28.5M/41.5M [08:35<04:14, 53.6kB/s]
     69%|######8   | 28.5M/41.5M [08:36<04:20, 52.2kB/s
 ]
     69%|######8   | 28.5M/41.5M [08:36<03:29, 65.0kB/s]
     69%|######8   | 28.5M/41.5M [08:36<03:45, 60.3kB/s]
     69%|######8   | 28.5M/41.5M [08:36<03:58, 56.9kB/s]
     69%|######8   | 28.5M/41.5M [08:36<03:17, 68.8kB/s]
     69%|######8   | 28.6M/41.5M [08:36<03:35, 62.8kB/s]
     69%|######8   | 28.6M/41.5M [08:37<03:05, 73.1kB/s]
     69%|######8   | 28.6M/41.5M [08:37<03:25, 65.8kB/s]
     69%|######8   | 28.6M/41.5M [08:37<02:59, 75.2kB/s]
     69%|######8   | 28.6M/41.5M [08:37<02:45, 81.8kB/s]
     69%|######8   | 28.6M/41.5M [08:37<02:36, 86.5kB/s]
     69%|######9   | 28.6M/41.5M [08:37<02:30, 89.7kB/s]
     69%|######9   | 28.7M/41.5M [08:38<02:26, 92.0kB/s]
     69%|######9   | 28.7M/41.5M [08:38<02:23, 93.6kB/s]
     69%|######9   | 28.7M/41.5M [08:38<02:02, 109kB/s] 
     69%|######9   | 28.7M/41.5M [08:39<04:17, 51.9kB/s]
     69%|######9   | 28.8M/41.5M [08:39<01:41, 132kB/s] 
     69%|######9   | 28.8M/41.5M [08:39<01:42, 129kB/s]
     70%|######9   | 28.8M/
 41.5M [08:39<02:01, 109kB/s]
     70%|######9   | 28.9M/41.5M [08:40<01:49, 121kB/s]
     70%|######9   | 28.9M/41.5M [08:40<02:19, 95.0kB/s]
     70%|######9   | 28.9M/41.5M [08:40<02:26, 90.1kB/s]
     70%|######9   | 28.9M/41.5M [08:40<02:16, 96.6kB/s]
     70%|######9   | 28.9M/41.5M [08:41<02:16, 96.8kB/s]
     70%|######9   | 29.0M/41.5M [08:41<02:15, 96.9kB/s]
     70%|######9   | 29.0M/41.5M [08:41<02:15, 97.0kB/s]
     70%|######9   | 29.0M/41.5M [08:41<02:15, 97.1kB/s]
     70%|######9   | 29.0M/41.5M [08:41<02:14, 97.2kB/s]
     70%|######9   | 29.0M/41.5M [08:41<02:14, 97.2kB/s]
     70%|######9   | 29.0M/41.5M [08:42<02:53, 75.5kB/s]
     70%|#######   | 29.1M/41.5M [08:42<02:55, 74.5kB/s]
     70%|#######   | 29.1M/41.5M [08:42<02:33, 85.0kB/s]
     70%|#######   | 29.1M/41.5M [08:43<03:04, 70.4kB/s]
     70%|#######   | 29.1M/41.5M [08:43<03:01, 71.6kB/s]
     70%|#######   | 29.1M/41.5M [08:43<04:00, 54.0kB/s]
     70%|#######   | 29.1M/41.5M [08:44<05:48, 37.2kB/s]
      70%|#######   | 29.1M/41.5M [08:44<04:31, 47.7kB/s]
     70%|#######   | 29.1M/41.5M [08:44<04:30, 47.9kB/s]
     70%|#######   | 29.1M/41.5M [08:44<04:29, 48.1kB/s]
     70%|#######   | 29.2M/41.5M [08:44<04:28, 48.1kB/s]
     70%|#######   | 29.2M/41.5M [08:45<05:36, 38.4kB/s]
     70%|#######   | 29.2M/41.5M [08:45<05:05, 42.2kB/s]
     70%|#######   | 29.2M/41.5M [08:45<04:56, 43.5kB/s]
     70%|#######   | 29.2M/41.5M [08:45<04:48, 44.7kB/s]
     70%|#######   | 29.2M/41.5M [08:46<05:52, 36.6kB/s]
     70%|#######   | 29.2M/41.5M [08:46<04:17, 50.0kB/s]
     70%|#######   | 29.2M/41.5M [08:46<07:25, 28.9kB/s]
     70%|#######   | 29.2M/41.5M [08:47<05:19, 40.2kB/s]
     70%|#######   | 29.2M/41.5M [08:47<05:06, 41.9kB/s]
     71%|#######   | 29.3M/41.5M [08:47<05:59, 35.7kB/s]
     71%|#######   | 29.3M/41.5M [08:47<05:34, 38.3kB/s]
     71%|#######   | 29.3M/41.5M [08:47<05:15, 40.6kB/s]
     71%|#######   | 29.3M/41.5M [08:48<05:00, 42.5kB/s]
     71%|#######   | 29.3M/4
 1.5M [08:48<04:49, 44.1kB/s]
     71%|#######   | 29.3M/41.5M [08:48<03:38, 58.4kB/s]
     71%|#######   | 29.3M/41.5M [08:48<03:49, 55.7kB/s]
     71%|#######   | 29.3M/41.5M [08:48<03:21, 63.2kB/s]
     71%|#######   | 29.3M/41.5M [08:48<03:21, 63.4kB/s]
     71%|#######   | 29.4M/41.5M [08:49<03:05, 68.6kB/s]
     71%|#######   | 29.4M/41.5M [08:49<03:08, 67.5kB/s]
     71%|#######   | 29.4M/41.5M [08:49<02:46, 76.5kB/s]
     71%|#######   | 29.4M/41.5M [08:49<03:20, 63.2kB/s]
     71%|#######   | 29.4M/41.5M [08:49<02:41, 78.6kB/s]
     71%|#######   | 29.4M/41.5M [08:50<02:41, 78.4kB/s]
     71%|#######   | 29.4M/41.5M [08:50<02:30, 83.9kB/s]
     71%|#######   | 29.4M/41.5M [08:50<02:23, 87.8kB/s]
     71%|#######1  | 29.5M/41.5M [08:50<02:19, 90.6kB/s]
     71%|#######1  | 29.5M/41.5M [08:50<02:16, 92.6kB/s]
     71%|#######1  | 29.5M/41.5M [08:50<02:13, 94.0kB/s]
     71%|#######1  | 29.5M/41.5M [08:51<03:31, 59.5kB/s]
     71%|#######1  | 29.5M/41.5M [08:51<02:22, 88.0kB/s]
 
     71%|#######1  | 29.6M/41.5M [08:51<03:07, 66.6kB/s]
     71%|#######1  | 29.6M/41.5M [08:52<03:22, 61.7kB/s]
     71%|#######1  | 29.6M/41.5M [08:52<02:56, 70.7kB/s]
     71%|#######1  | 29.6M/41.5M [08:52<02:45, 75.4kB/s]
     71%|#######1  | 29.6M/41.5M [08:53<03:50, 54.0kB/s]
     71%|#######1  | 29.7M/41.5M [08:53<03:47, 54.5kB/s]
     72%|#######1  | 29.7M/41.5M [08:54<04:25, 46.6kB/s]
     72%|#######1  | 29.7M/41.5M [08:54<04:22, 47.2kB/s]
     72%|#######1  | 29.7M/41.5M [08:54<04:20, 47.4kB/s]
     72%|#######1  | 29.7M/41.5M [08:55<04:19, 47.6kB/s]
     72%|#######1  | 29.7M/41.5M [08:55<04:18, 47.8kB/s]
     72%|#######1  | 29.7M/41.5M [08:55<04:16, 48.0kB/s]
     72%|#######1  | 29.7M/41.5M [08:55<04:15, 48.2kB/s]
     72%|#######1  | 29.7M/41.5M [08:55<04:15, 48.3kB/s]
     72%|#######1  | 29.8M/41.5M [08:55<04:14, 48.4kB/s]
     72%|#######1  | 29.8M/41.5M [08:56<03:18, 62.0kB/s]
     72%|#######1  | 29.8M/41.5M [08:56<03:31, 58.2kB/s]
     72%|#######1  | 29.8M/
 41.5M [08:56<02:56, 69.5kB/s]
     72%|#######1  | 29.8M/41.5M [08:56<03:13, 63.4kB/s]
     72%|#######1  | 29.8M/41.5M [08:56<02:46, 73.4kB/s]
     72%|#######1  | 29.8M/41.5M [08:56<03:05, 66.1kB/s]
     72%|#######1  | 29.8M/41.5M [08:57<02:42, 75.4kB/s]
     72%|#######1  | 29.9M/41.5M [08:57<02:28, 81.9kB/s]
     72%|#######1  | 29.9M/41.5M [08:57<03:14, 62.7kB/s]
     72%|#######2  | 29.9M/41.5M [08:57<02:40, 75.6kB/s]
     72%|#######2  | 29.9M/41.5M [08:57<02:08, 94.2kB/s]
     72%|#######2  | 29.9M/41.5M [08:58<02:16, 88.6kB/s]
     72%|#######2  | 29.9M/41.5M [08:58<02:03, 97.9kB/s]
     72%|#######2  | 30.0M/41.5M [08:58<02:39, 75.7kB/s]
     72%|#######2  | 30.0M/41.5M [08:58<02:18, 87.1kB/s]
     72%|#######2  | 30.0M/41.5M [08:59<02:14, 89.7kB/s]
     72%|#######2  | 30.0M/41.5M [08:59<02:54, 69.1kB/s]
     72%|#######2  | 30.0M/41.5M [08:59<02:19, 85.9kB/s]
     72%|#######2  | 30.0M/41.5M [08:59<02:47, 71.6kB/s]
     72%|#######2  | 30.1M/41.5M [09:00<03:42, 53.8kB/s
 ]
     72%|#######2  | 30.1M/41.5M [09:00<03:36, 55.3kB/s]
     72%|#######2  | 30.1M/41.5M [09:01<05:26, 36.7kB/s]
     73%|#######2  | 30.1M/41.5M [09:01<04:14, 46.9kB/s]
     73%|#######2  | 30.1M/41.5M [09:01<05:01, 39.6kB/s]
     73%|#######2  | 30.1M/41.5M [09:01<04:49, 41.2kB/s]
     73%|#######2  | 30.1M/41.5M [09:02<05:37, 35.3kB/s]
     73%|#######2  | 30.1M/41.5M [09:02<05:13, 38.0kB/s]
     73%|#######2  | 30.1M/41.5M [09:02<04:55, 40.4kB/s]
     73%|#######2  | 30.1M/41.5M [09:02<04:40, 42.4kB/s]
     73%|#######2  | 30.1M/41.5M [09:02<04:30, 44.0kB/s]
     73%|#######2  | 30.2M/41.5M [09:02<04:22, 45.2kB/s]
     73%|#######2  | 30.2M/41.5M [09:03<03:18, 59.6kB/s]
     73%|#######2  | 30.2M/41.5M [09:03<03:29, 56.5kB/s]
     73%|#######2  | 30.2M/41.5M [09:03<03:38, 54.3kB/s]
     73%|#######2  | 30.2M/41.5M [09:03<02:57, 66.8kB/s]
     73%|#######2  | 30.2M/41.5M [09:03<03:12, 61.4kB/s]
     73%|#######2  | 30.2M/41.5M [09:03<02:43, 72.1kB/s]
     73%|#######2  | 30.2M
 /41.5M [09:04<02:28, 79.5kB/s]
     73%|#######2  | 30.2M/41.5M [09:04<03:37, 54.2kB/s]
     73%|#######2  | 30.3M/41.5M [09:04<03:11, 61.5kB/s]
     73%|#######3  | 30.3M/41.5M [09:05<02:34, 75.8kB/s]
     73%|#######3  | 30.3M/41.5M [09:05<02:38, 73.9kB/s]
     73%|#######3  | 30.3M/41.5M [09:05<02:53, 67.4kB/s]
     73%|#######3  | 30.3M/41.5M [09:05<02:45, 70.6kB/s]
     73%|#######3  | 30.3M/41.5M [09:05<03:00, 64.8kB/s]
     73%|#######3  | 30.3M/41.5M [09:05<03:00, 64.9kB/s]
     73%|#######3  | 30.4M/41.5M [09:06<02:48, 69.3kB/s]
     73%|#######3  | 30.4M/41.5M [09:06<03:56, 49.4kB/s]
     73%|#######3  | 30.4M/41.5M [09:06<02:40, 72.5kB/s]
     73%|#######3  | 30.4M/41.5M [09:06<03:05, 62.7kB/s]
     73%|#######3  | 30.4M/41.5M [09:07<03:03, 63.2kB/s]
     73%|#######3  | 30.4M/41.5M [09:07<03:15, 59.5kB/s]
     73%|#######3  | 30.4M/41.5M [09:07<03:24, 56.6kB/s]
     73%|#######3  | 30.4M/41.5M [09:07<03:32, 54.4kB/s]
     73%|#######3  | 30.4M/41.5M [09:07<03:15, 59.2kB/
 s]
     73%|#######3  | 30.5M/41.5M [09:07<03:02, 63.5kB/s]
     73%|#######3  | 30.5M/41.5M [09:08<03:14, 59.3kB/s]
     73%|#######3  | 30.5M/41.5M [09:08<04:25, 43.5kB/s]
     74%|#######3  | 30.5M/41.5M [09:08<02:48, 68.4kB/s]
     74%|#######3  | 30.5M/41.5M [09:08<02:43, 70.2kB/s]
     74%|#######3  | 30.5M/41.5M [09:09<04:25, 43.4kB/s]
     74%|#######3  | 30.5M/41.5M [09:09<02:44, 69.7kB/s]
     74%|#######3  | 30.6M/41.5M [09:09<03:07, 61.3kB/s]
     74%|#######3  | 30.6M/41.5M [09:09<03:07, 61.0kB/s]
     74%|#######3  | 30.6M/41.5M [09:10<02:59, 63.6kB/s]
     74%|#######3  | 30.6M/41.5M [09:10<02:53, 65.9kB/s]
     74%|#######3  | 30.6M/41.5M [09:10<04:12, 45.3kB/s]
     74%|#######3  | 30.6M/41.5M [09:10<02:36, 72.6kB/s]
     74%|#######3  | 30.6M/41.5M [09:10<02:37, 72.0kB/s]
     74%|#######3  | 30.7M/41.5M [09:11<02:47, 67.8kB/s]
     74%|#######3  | 30.7M/41.5M [09:11<02:31, 74.7kB/s]
     74%|#######3  | 30.7M/41.5M [09:11<02:21, 80.3kB/s]
     74%|#######4  | 30.7
 M/41.5M [09:11<02:26, 76.9kB/s]
     74%|#######4  | 30.7M/41.5M [09:12<03:14, 58.2kB/s]
     74%|#######4  | 30.7M/41.5M [09:12<03:03, 61.4kB/s]
     74%|#######4  | 30.8M/41.5M [09:12<02:21, 79.5kB/s]
     74%|#######4  | 30.8M/41.5M [09:12<02:34, 73.0kB/s]
     74%|#######4  | 30.8M/41.5M [09:13<02:23, 78.4kB/s]
     74%|#######4  | 30.8M/41.5M [09:13<02:27, 76.1kB/s]
     74%|#######4  | 30.8M/41.5M [09:13<02:51, 65.4kB/s]
     74%|#######4  | 30.8M/41.5M [09:13<02:22, 78.5kB/s]
     74%|#######4  | 30.9M/41.5M [09:13<02:13, 83.3kB/s]
     74%|#######4  | 30.9M/41.5M [09:14<02:20, 79.2kB/s]
     74%|#######4  | 30.9M/41.5M [09:14<02:12, 83.8kB/s]
     74%|#######4  | 30.9M/41.5M [09:14<02:07, 87.4kB/s]
     75%|#######4  | 30.9M/41.5M [09:14<02:24, 76.8kB/s]
     75%|#######4  | 30.9M/41.5M [09:14<02:15, 81.9kB/s]
     75%|#######4  | 30.9M/41.5M [09:15<02:42, 67.9kB/s]
     75%|#######4  | 31.0M/41.5M [09:15<02:08, 85.9kB/s]
     75%|#######4  | 31.0M/41.5M [09:15<02:34, 71.2kB
 /s]
     75%|#######4  | 31.0M/41.5M [09:16<02:54, 62.9kB/s]
     75%|#######4  | 31.0M/41.5M [09:16<02:25, 75.6kB/s]
     75%|#######4  | 31.0M/41.5M [09:16<02:35, 70.6kB/s]
     75%|#######4  | 31.0M/41.5M [09:16<02:35, 70.3kB/s]
     75%|#######4  | 31.1M/41.5M [09:16<02:32, 71.9kB/s]
     75%|#######4  | 31.1M/41.5M [09:16<02:46, 65.7kB/s]
     75%|#######4  | 31.1M/41.5M [09:17<02:42, 67.1kB/s]
     75%|#######4  | 31.1M/41.5M [09:17<02:36, 69.5kB/s]
     75%|#######4  | 31.1M/41.5M [09:17<02:20, 77.7kB/s]
     75%|#######4  | 31.1M/41.5M [09:17<03:36, 50.3kB/s]
     75%|#######5  | 31.1M/41.5M [09:18<02:13, 81.5kB/s]
     75%|#######5  | 31.2M/41.5M [09:18<02:06, 85.4kB/s]
     75%|#######5  | 31.2M/41.5M [09:18<02:02, 88.5kB/s]
     75%|#######5  | 31.2M/41.5M [09:18<01:58, 90.9kB/s]
     75%|#######5  | 31.2M/41.5M [09:18<01:56, 92.7kB/s]
     75%|#######5  | 31.2M/41.5M [09:18<02:14, 80.2kB/s]
     75%|#######5  | 31.2M/41.5M [09:19<02:07, 84.6kB/s]
     75%|#######5  | 31.
 2M/41.5M [09:20<04:29, 39.9kB/s]
     75%|#######5  | 31.3M/41.5M [09:20<02:50, 62.7kB/s]
     75%|#######5  | 31.3M/41.5M [09:20<02:20, 76.0kB/s]
     76%|#######5  | 31.3M/41.5M [09:21<03:46, 47.1kB/s]
     76%|#######5  | 31.3M/41.5M [09:21<03:56, 44.9kB/s]
     76%|#######5  | 31.4M/41.5M [09:22<04:26, 39.9kB/s]
     76%|#######5  | 31.4M/41.5M [09:22<04:09, 42.5kB/s]
     76%|#######5  | 31.4M/41.5M [09:22<04:56, 35.8kB/s]
     76%|#######5  | 31.4M/41.5M [09:23<06:26, 27.5kB/s]
     76%|#######5  | 31.4M/41.5M [09:23<06:26, 27.4kB/s]
     76%|#######5  | 31.4M/41.5M [09:23<05:55, 29.8kB/s]
     76%|#######5  | 31.4M/41.5M [09:23<06:03, 29.1kB/s]
     76%|#######5  | 31.4M/41.5M [09:24<05:36, 31.5kB/s]
     76%|#######5  | 31.4M/41.5M [09:24<05:50, 30.2kB/s]
     76%|#######5  | 31.4M/41.5M [09:24<06:27, 27.2kB/s]
     76%|#######5  | 31.4M/41.5M [09:25<05:10, 34.0kB/s]
     76%|#######5  | 31.4M/41.5M [09:25<04:47, 36.7kB/s]
     76%|#######5  | 31.5M/41.5M [09:25<04:28, 39.2k
 B/s]
     76%|#######5  | 31.5M/41.5M [09:25<04:14, 41.3kB/s]
     76%|#######5  | 31.5M/41.5M [09:25<04:03, 43.2kB/s]
     76%|#######5  | 31.5M/41.5M [09:26<03:50, 45.4kB/s]
     76%|#######5  | 31.5M/41.5M [09:26<04:38, 37.6kB/s]
     76%|#######5  | 31.5M/41.5M [09:26<03:28, 50.1kB/s]
     76%|#######5  | 31.5M/41.5M [09:26<03:30, 49.8kB/s]
     76%|#######5  | 31.5M/41.5M [09:26<03:31, 49.5kB/s]
     76%|#######6  | 31.5M/41.5M [09:27<02:48, 62.0kB/s]
     76%|#######6  | 31.5M/41.5M [09:27<03:47, 45.9kB/s]
     76%|#######6  | 31.6M/41.5M [09:27<03:43, 46.5kB/s]
     76%|#######6  | 31.6M/41.5M [09:27<02:55, 59.2kB/s]
     76%|#######6  | 31.6M/41.5M [09:27<03:04, 56.5kB/s]
     76%|#######6  | 31.6M/41.5M [09:28<04:03, 42.7kB/s]
     76%|#######6  | 31.6M/41.5M [09:28<03:06, 55.5kB/s]
     76%|#######6  | 31.6M/41.5M [09:28<03:12, 53.8kB/s]
     76%|#######6  | 31.6M/41.5M [09:28<03:17, 52.4kB/s]
     76%|#######6  | 31.6M/41.5M [09:29<03:21, 51.4kB/s]
     76%|#######6  | 31
 .6M/41.5M [09:29<02:40, 64.3kB/s]
     76%|#######6  | 31.6M/41.5M [09:29<02:52, 59.8kB/s]
     76%|#######6  | 31.7M/41.5M [09:29<03:02, 56.6kB/s]
     76%|#######6  | 31.7M/41.5M [09:29<04:05, 42.0kB/s]
     76%|#######6  | 31.7M/41.5M [09:30<02:32, 67.3kB/s]
     76%|#######6  | 31.7M/41.5M [09:30<03:26, 49.6kB/s]
     76%|#######6  | 31.7M/41.5M [09:30<02:49, 60.4kB/s]
     76%|#######6  | 31.7M/41.5M [09:30<02:58, 57.5kB/s]
     76%|#######6  | 31.7M/41.5M [09:30<03:05, 55.2kB/s]
     76%|#######6  | 31.7M/41.5M [09:31<03:11, 53.4kB/s]
     77%|#######6  | 31.8M/41.5M [09:31<02:35, 65.7kB/s]
     77%|#######6  | 31.8M/41.5M [09:31<02:47, 60.8kB/s]
     77%|#######6  | 31.8M/41.5M [09:31<02:57, 57.3kB/s]
     77%|#######6  | 31.8M/41.5M [09:31<02:27, 69.0kB/s]
     77%|#######6  | 31.8M/41.5M [09:31<02:41, 63.0kB/s]
     77%|#######6  | 31.8M/41.5M [09:32<02:18, 73.2kB/s]
     77%|#######6  | 31.8M/41.5M [09:32<02:06, 80.3kB/s]
     77%|#######6  | 31.8M/41.5M [09:32<02:22, 70.9
 kB/s]
     77%|#######6  | 31.8M/41.5M [09:32<02:46, 60.7kB/s]
     77%|#######6  | 31.9M/41.5M [09:32<02:03, 81.6kB/s]
     77%|#######6  | 31.9M/41.5M [09:33<01:57, 85.7kB/s]
     77%|#######6  | 31.9M/41.5M [09:33<01:53, 88.9kB/s]
     77%|#######6  | 31.9M/41.5M [09:33<02:20, 71.3kB/s]
     77%|#######6  | 31.9M/41.5M [09:33<02:17, 73.0kB/s]
     77%|#######6  | 31.9M/41.5M [09:33<02:19, 71.6kB/s]
     77%|#######6  | 31.9M/41.5M [09:34<03:14, 51.5kB/s]
     77%|#######7  | 32.0M/41.5M [09:34<04:42, 35.4kB/s]
     77%|#######7  | 32.0M/41.5M [09:35<04:15, 39.0kB/s]
     77%|#######7  | 32.0M/41.5M [09:35<04:45, 34.9kB/s]
     77%|#######7  | 32.0M/41.5M [09:35<03:39, 45.3kB/s]
     77%|#######7  | 32.0M/41.5M [09:35<03:36, 45.9kB/s]
     77%|#######7  | 32.0M/41.5M [09:36<03:33, 46.5kB/s]
     77%|#######7  | 32.0M/41.5M [09:36<03:31, 47.0kB/s]
     77%|#######7  | 32.0M/41.5M [09:36<03:29, 47.4kB/s]
     77%|#######7  | 32.0M/41.5M [09:36<03:27, 47.8kB/s]
     77%|#######7  | 3
 2.0M/41.5M [09:36<03:26, 48.0kB/s]
     77%|#######7  | 32.1M/41.5M [09:37<04:23, 37.6kB/s]
     77%|#######7  | 32.1M/41.5M [09:37<03:55, 41.9kB/s]
     77%|#######7  | 32.1M/41.5M [09:37<03:03, 53.8kB/s]
     77%|#######7  | 32.1M/41.5M [09:37<03:07, 52.6kB/s]
     77%|#######7  | 32.1M/41.5M [09:38<03:58, 41.3kB/s]
     77%|#######7  | 32.1M/41.5M [09:38<03:49, 42.9kB/s]
     77%|#######7  | 32.1M/41.5M [09:38<03:41, 44.3kB/s]
     77%|#######7  | 32.1M/41.5M [09:38<03:36, 45.4kB/s]
     77%|#######7  | 32.1M/41.5M [09:38<03:31, 46.3kB/s]
     77%|#######7  | 32.1M/41.5M [09:38<02:42, 60.4kB/s]
     78%|#######7  | 32.2M/41.5M [09:39<02:51, 57.1kB/s]
     78%|#######7  | 32.2M/41.5M [09:39<02:58, 54.7kB/s]
     78%|#######7  | 32.2M/41.5M [09:39<02:25, 67.1kB/s]
     78%|#######7  | 32.2M/41.5M [09:39<02:38, 61.6kB/s]
     78%|#######7  | 32.2M/41.5M [09:39<02:14, 72.2kB/s]
     78%|#######7  | 32.2M/41.5M [09:39<02:02, 79.6kB/s]
     78%|#######7  | 32.2M/41.5M [09:40<02:17, 70.
 4kB/s]
     78%|#######7  | 32.2M/41.5M [09:40<02:03, 78.4kB/s]
     78%|#######7  | 32.3M/41.5M [09:40<02:04, 78.0kB/s]
     78%|#######7  | 32.3M/41.5M [09:40<01:32, 105kB/s] 
     78%|#######7  | 32.3M/41.5M [09:40<01:41, 95.2kB/s]
     78%|#######7  | 32.3M/41.5M [09:41<01:33, 103kB/s] 
     78%|#######7  | 32.3M/41.5M [09:41<01:41, 94.2kB/s]
     78%|#######7  | 32.3M/41.5M [09:41<01:40, 95.1kB/s]
     78%|#######7  | 32.4M/41.5M [09:41<02:09, 74.1kB/s]
     78%|#######8  | 32.4M/41.5M [09:41<01:52, 84.7kB/s]
     78%|#######8  | 32.4M/41.5M [09:42<02:02, 77.8kB/s]
     78%|#######8  | 32.4M/41.5M [09:42<02:39, 59.8kB/s]
     78%|#######8  | 32.4M/41.5M [09:42<02:33, 61.7kB/s]
     78%|#######8  | 32.4M/41.5M [09:43<02:38, 59.9kB/s]
     78%|#######8  | 32.5M/41.5M [09:43<02:44, 57.7kB/s]
     78%|#######8  | 32.5M/41.5M [09:43<02:22, 66.3kB/s]
     78%|#######8  | 32.5M/41.5M [09:43<02:32, 62.1kB/s]
     78%|#######8  | 32.5M/41.5M [09:43<02:12, 70.9kB/s]
     78%|#######8  | 
 32.5M/41.5M [09:44<02:24, 65.1kB/s]
     78%|#######8  | 32.5M/41.5M [09:44<02:35, 60.6kB/s]
     78%|#######8  | 32.5M/41.5M [09:44<02:49, 55.3kB/s]
     78%|#######8  | 32.5M/41.5M [09:44<02:54, 53.7kB/s]
     78%|#######8  | 32.5M/41.5M [09:44<02:24, 64.8kB/s]
     78%|#######8  | 32.6M/41.5M [09:45<03:16, 47.8kB/s]
     79%|#######8  | 32.6M/41.5M [09:45<02:47, 55.9kB/s]
     79%|#######8  | 32.6M/41.5M [09:45<02:52, 54.2kB/s]
     79%|#######8  | 32.6M/41.5M [09:45<02:56, 52.8kB/s]
     79%|#######8  | 32.6M/41.5M [09:45<03:00, 51.7kB/s]
     79%|#######8  | 32.6M/41.5M [09:46<03:03, 50.9kB/s]
     79%|#######8  | 32.6M/41.5M [09:46<03:44, 41.4kB/s]
     79%|#######8  | 32.6M/41.5M [09:46<03:48, 40.7kB/s]
     79%|#######8  | 32.6M/41.5M [09:47<05:13, 29.7kB/s]
     79%|#######8  | 32.6M/41.5M [09:47<04:51, 31.8kB/s]
     79%|#######8  | 32.6M/41.5M [09:47<05:17, 29.2kB/s]
     79%|#######8  | 32.6M/41.5M [09:47<05:36, 27.6kB/s]
     79%|#######8  | 32.7M/41.5M [09:48<05:49, 26
 .5kB/s]
     79%|#######8  | 32.7M/41.5M [09:48<03:52, 39.8kB/s]
     79%|#######8  | 32.7M/41.5M [09:48<03:41, 41.7kB/s]
     79%|#######8  | 32.7M/41.5M [09:48<03:33, 43.3kB/s]
     79%|#######8  | 32.7M/41.5M [09:49<03:39, 41.9kB/s]
     79%|#######8  | 32.7M/41.5M [09:49<03:31, 43.6kB/s]
     79%|#######8  | 32.7M/41.5M [09:49<03:24, 44.9kB/s]
     79%|#######8  | 32.7M/41.5M [09:49<03:20, 46.0kB/s]
     79%|#######8  | 32.7M/41.5M [09:49<03:02, 50.4kB/s]
     79%|#######8  | 32.7M/41.5M [09:49<02:33, 59.9kB/s]
     79%|#######8  | 32.8M/41.5M [09:50<02:41, 56.7kB/s]
     79%|#######8  | 32.8M/41.5M [09:50<02:35, 58.8kB/s]
     79%|#######8  | 32.8M/41.5M [09:50<02:57, 51.5kB/s]
     79%|#######9  | 32.8M/41.5M [09:50<02:10, 70.1kB/s]
     79%|#######9  | 32.8M/41.5M [09:50<02:05, 72.7kB/s]
     79%|#######9  | 32.8M/41.5M [09:50<02:18, 65.7kB/s]
     79%|#######9  | 32.8M/41.5M [09:51<02:01, 74.9kB/s]
     79%|#######9  | 32.8M/41.5M [09:51<02:55, 51.9kB/s]
     79%|#######9  |
  32.9M/41.5M [09:51<02:08, 70.7kB/s]
     79%|#######9  | 32.9M/41.5M [09:51<02:46, 54.2kB/s]
     79%|#######9  | 32.9M/41.5M [09:52<02:26, 61.5kB/s]
     79%|#######9  | 32.9M/41.5M [09:52<02:08, 69.9kB/s]
     79%|#######9  | 32.9M/41.5M [09:52<02:19, 64.6kB/s]
     79%|#######9  | 32.9M/41.5M [09:52<03:08, 47.7kB/s]
     79%|#######9  | 32.9M/41.5M [09:52<02:32, 59.1kB/s]
     79%|#######9  | 32.9M/41.5M [09:53<02:38, 56.5kB/s]
     79%|#######9  | 32.9M/41.5M [09:53<02:44, 54.5kB/s]
     79%|#######9  | 32.9M/41.5M [09:53<02:49, 52.9kB/s]
     79%|#######9  | 33.0M/41.5M [09:53<02:16, 65.3kB/s]
     79%|#######9  | 33.0M/41.5M [09:53<02:27, 60.6kB/s]
     79%|#######9  | 33.0M/41.5M [09:53<02:36, 57.1kB/s]
     80%|#######9  | 33.0M/41.5M [09:54<03:31, 42.2kB/s]
     80%|#######9  | 33.0M/41.5M [09:54<02:11, 67.6kB/s]
     80%|#######9  | 33.0M/41.5M [09:54<02:21, 62.7kB/s]
     80%|#######9  | 33.0M/41.5M [09:54<02:30, 58.9kB/s]
     80%|#######9  | 33.0M/41.5M [09:54<02:38, 5
 6.1kB/s]
     80%|#######9  | 33.0M/41.5M [09:55<02:47, 52.8kB/s]
     80%|#######9  | 33.1M/41.5M [09:55<02:19, 63.5kB/s]
     80%|#######9  | 33.1M/41.5M [09:55<02:27, 59.7kB/s]
     80%|#######9  | 33.1M/41.5M [09:55<02:35, 56.7kB/s]
     80%|#######9  | 33.1M/41.5M [09:56<02:45, 53.2kB/s]
     80%|#######9  | 33.1M/41.5M [09:56<02:18, 63.6kB/s]
     80%|#######9  | 33.1M/41.5M [09:56<02:26, 59.8kB/s]
     80%|#######9  | 33.1M/41.5M [09:56<02:34, 56.9kB/s]
     80%|#######9  | 33.1M/41.5M [09:56<02:40, 54.6kB/s]
     80%|#######9  | 33.1M/41.5M [09:57<02:20, 62.1kB/s]
     80%|#######9  | 33.2M/41.5M [09:57<02:29, 58.4kB/s]
     80%|#######9  | 33.2M/41.5M [09:57<02:25, 60.0kB/s]
     80%|#######9  | 33.2M/41.5M [09:57<02:34, 56.6kB/s]
     80%|#######9  | 33.2M/41.5M [09:57<02:40, 54.2kB/s]
     80%|########  | 33.2M/41.5M [09:57<02:09, 67.2kB/s]
     80%|########  | 33.2M/41.5M [09:58<03:03, 47.4kB/s]
     80%|########  | 33.2M/41.5M [09:58<02:08, 67.4kB/s]
     80%|########  
 | 33.2M/41.5M [09:58<02:17, 62.8kB/s]
     80%|########  | 33.2M/41.5M [09:58<02:16, 63.4kB/s]
     80%|########  | 33.3M/41.5M [09:59<02:40, 53.7kB/s]
     80%|########  | 33.3M/41.5M [09:59<02:07, 67.8kB/s]
     80%|########  | 33.3M/41.5M [09:59<02:17, 62.7kB/s]
     80%|########  | 33.3M/41.5M [09:59<02:26, 58.9kB/s]
     80%|########  | 33.3M/41.5M [09:59<02:33, 56.0kB/s]
     80%|########  | 33.3M/41.5M [09:59<02:06, 67.8kB/s]
     80%|########  | 33.3M/41.5M [10:00<02:24, 59.2kB/s]
     80%|########  | 33.3M/41.5M [10:00<02:27, 58.1kB/s]
     80%|########  | 33.4M/41.5M [10:00<02:32, 55.9kB/s]
     80%|########  | 33.4M/41.5M [10:00<02:47, 50.9kB/s]
     80%|########  | 33.4M/41.5M [10:01<02:16, 62.4kB/s]
     80%|########  | 33.4M/41.5M [10:01<03:30, 40.4kB/s]
     80%|########  | 33.4M/41.5M [10:01<02:51, 49.4kB/s]
     81%|########  | 33.4M/41.5M [10:01<03:01, 46.8kB/s]
     81%|########  | 33.4M/41.5M [10:02<02:49, 49.9kB/s]
     81%|########  | 33.4M/41.5M [10:02<02:50, 
 49.6kB/s]
     81%|########  | 33.4M/41.5M [10:02<02:51, 49.3kB/s]
     81%|########  | 33.4M/41.5M [10:02<02:51, 49.2kB/s]
     81%|########  | 33.4M/41.5M [10:02<03:52, 36.3kB/s]
     81%|########  | 33.5M/41.5M [10:03<02:47, 50.4kB/s]
     81%|########  | 33.5M/41.5M [10:03<02:48, 49.9kB/s]
     81%|########  | 33.5M/41.5M [10:03<03:46, 37.0kB/s]
     81%|########  | 33.5M/41.5M [10:03<03:32, 39.6kB/s]
     81%|########  | 33.5M/41.5M [10:03<02:37, 53.3kB/s]
     81%|########  | 33.5M/41.5M [10:04<02:40, 52.1kB/s]
     81%|########  | 33.5M/41.5M [10:04<02:43, 51.2kB/s]
     81%|########  | 33.5M/41.5M [10:04<02:32, 54.8kB/s]
     81%|########  | 33.5M/41.5M [10:04<03:24, 40.7kB/s]
     81%|########  | 33.5M/41.5M [10:04<02:32, 54.6kB/s]
     81%|########  | 33.6M/41.5M [10:05<02:36, 53.1kB/s]
     81%|########  | 33.6M/41.5M [10:05<02:40, 51.9kB/s]
     81%|########  | 33.6M/41.5M [10:05<02:18, 60.0kB/s]
     81%|########  | 33.6M/41.5M [10:05<02:25, 56.9kB/s]
     81%|######## 
  | 33.6M/41.5M [10:05<02:35, 53.3kB/s]
     81%|########1 | 33.6M/41.5M [10:06<02:40, 51.5kB/s]
     81%|########1 | 33.6M/41.5M [10:06<02:43, 50.5kB/s]
     81%|########1 | 33.6M/41.5M [10:06<02:54, 47.2kB/s]
     81%|########1 | 33.6M/41.5M [10:07<03:18, 41.4kB/s]
     81%|########1 | 33.7M/41.5M [10:07<03:23, 40.3kB/s]
     81%|########1 | 33.7M/41.5M [10:07<03:14, 42.1kB/s]
     81%|########1 | 33.7M/41.5M [10:07<03:07, 43.7kB/s]
     81%|########1 | 33.7M/41.5M [10:08<03:48, 35.8kB/s]
     81%|########1 | 33.7M/41.5M [10:08<03:31, 38.7kB/s]
     81%|########1 | 33.7M/41.5M [10:08<04:06, 33.1kB/s]
     81%|########1 | 33.7M/41.5M [10:08<03:02, 44.8kB/s]
     81%|########1 | 33.7M/41.5M [10:08<02:58, 45.7kB/s]
     81%|########1 | 33.7M/41.5M [10:09<02:55, 46.4kB/s]
     81%|########1 | 33.7M/41.5M [10:09<02:53, 47.0kB/s]
     81%|########1 | 33.7M/41.5M [10:09<03:37, 37.3kB/s]
     81%|########1 | 33.8M/41.5M [10:09<03:23, 40.0kB/s]
     81%|########1 | 33.8M/41.5M [10:10<04:00,
  33.7kB/s]
     81%|########1 | 33.8M/41.5M [10:10<04:27, 30.3kB/s]
     81%|########1 | 33.8M/41.5M [10:10<03:03, 44.1kB/s]
     81%|########1 | 33.8M/41.5M [10:10<03:39, 36.8kB/s]
     81%|########1 | 33.8M/41.5M [10:11<03:25, 39.3kB/s]
     81%|########1 | 33.8M/41.5M [10:11<03:14, 41.4kB/s]
     81%|########1 | 33.8M/41.5M [10:11<03:06, 43.2kB/s]
     82%|########1 | 33.8M/41.5M [10:11<03:00, 44.6kB/s]
     82%|########1 | 33.8M/41.5M [10:11<02:55, 45.7kB/s]
     82%|########1 | 33.8M/41.5M [10:12<02:23, 55.8kB/s]
     82%|########1 | 33.9M/41.5M [10:12<02:28, 53.9kB/s]
     82%|########1 | 33.9M/41.5M [10:12<02:20, 56.8kB/s]
     82%|########1 | 33.9M/41.5M [10:12<02:05, 63.7kB/s]
     82%|########1 | 33.9M/41.5M [10:12<02:03, 64.5kB/s]
     82%|########1 | 33.9M/41.5M [10:12<01:46, 74.5kB/s]
     82%|########1 | 33.9M/41.5M [10:12<01:37, 81.4kB/s]
     82%|########1 | 33.9M/41.5M [10:13<01:50, 71.5kB/s]
     82%|########1 | 33.9M/41.5M [10:13<01:39, 79.3kB/s]
     82%|########
 1 | 34.0M/41.5M [10:13<01:33, 84.8kB/s]
     82%|########1 | 34.0M/41.5M [10:13<01:29, 88.5kB/s]
     82%|########1 | 34.0M/41.5M [10:13<01:26, 91.2kB/s]
     82%|########1 | 34.0M/41.5M [10:13<01:12, 108kB/s] 
     82%|########2 | 34.0M/41.5M [10:14<01:14, 105kB/s]
     82%|########2 | 34.0M/41.5M [10:14<01:06, 117kB/s]
     82%|########2 | 34.1M/41.5M [10:14<01:10, 111kB/s]
     82%|########2 | 34.1M/41.5M [10:14<01:18, 98.7kB/s]
     82%|########2 | 34.1M/41.5M [10:14<01:03, 121kB/s] 
     82%|########2 | 34.1M/41.5M [10:15<01:07, 114kB/s]
     82%|########2 | 34.1M/41.5M [10:15<01:10, 109kB/s]
     82%|########2 | 34.1M/41.5M [10:15<01:13, 105kB/s]
     82%|########2 | 34.2M/41.5M [10:15<01:05, 118kB/s]
     82%|########2 | 34.2M/41.5M [10:15<01:00, 126kB/s]
     82%|########2 | 34.2M/41.5M [10:15<01:04, 117kB/s]
     83%|########2 | 34.2M/41.5M [10:16<01:00, 126kB/s]
     83%|########2 | 34.3M/41.5M [10:16<00:57, 132kB/s]
     83%|########2 | 34.3M/41.5M [10:16<01:20, 93.5kB/s]
 
     83%|########2 | 34.3M/41.5M [10:16<01:20, 94.3kB/s]
     83%|########2 | 34.3M/41.5M [10:16<01:19, 94.9kB/s]
     83%|########2 | 34.3M/41.5M [10:17<01:31, 81.8kB/s]
     83%|########2 | 34.4M/41.5M [10:17<01:23, 89.9kB/s]
     83%|########2 | 34.4M/41.5M [10:17<01:41, 73.8kB/s]
     83%|########2 | 34.4M/41.5M [10:18<01:55, 64.7kB/s]
     83%|########2 | 34.4M/41.5M [10:18<02:01, 61.4kB/s]
     83%|########2 | 34.4M/41.5M [10:18<01:46, 69.5kB/s]
     83%|########2 | 34.4M/41.5M [10:18<01:55, 64.4kB/s]
     83%|########2 | 34.4M/41.5M [10:18<01:41, 72.9kB/s]
     83%|########3 | 34.4M/41.5M [10:18<01:51, 66.4kB/s]
     83%|########3 | 34.5M/41.5M [10:19<01:38, 75.0kB/s]
     83%|########3 | 34.5M/41.5M [10:19<01:30, 81.3kB/s]
     83%|########3 | 34.5M/41.5M [10:19<01:25, 85.9kB/s]
     83%|########3 | 34.5M/41.5M [10:19<01:22, 89.3kB/s]
     83%|########3 | 34.5M/41.5M [10:19<01:43, 70.8kB/s]
     83%|########3 | 34.5M/41.5M [10:20<01:21, 89.1kB/s]
     83%|########3 | 34.6M/
 41.5M [10:20<01:19, 91.2kB/s]
     83%|########3 | 34.6M/41.5M [10:20<01:39, 73.1kB/s]
     83%|########3 | 34.6M/41.5M [10:20<01:31, 78.8kB/s]
     83%|########3 | 34.6M/41.5M [10:20<01:26, 83.4kB/s]
     83%|########3 | 34.6M/41.5M [10:21<01:22, 87.0kB/s]
     83%|########3 | 34.6M/41.5M [10:21<01:20, 89.9kB/s]
     84%|########3 | 34.6M/41.5M [10:21<01:23, 85.9kB/s]
     84%|########3 | 34.7M/41.5M [10:21<01:42, 69.9kB/s]
     84%|########3 | 34.7M/41.5M [10:22<01:50, 64.9kB/s]
     84%|########3 | 34.7M/41.5M [10:22<01:44, 68.2kB/s]
     84%|########3 | 34.7M/41.5M [10:22<01:51, 63.7kB/s]
     84%|########3 | 34.7M/41.5M [10:22<01:58, 60.0kB/s]
     84%|########3 | 34.7M/41.5M [10:22<02:04, 57.0kB/s]
     84%|########3 | 34.7M/41.5M [10:23<02:09, 54.8kB/s]
     84%|########3 | 34.7M/41.5M [10:23<02:13, 53.1kB/s]
     84%|########3 | 34.8M/41.5M [10:23<01:47, 65.7kB/s]
     84%|########3 | 34.8M/41.5M [10:23<01:56, 60.8kB/s]
     84%|########3 | 34.8M/41.5M [10:23<02:03, 57.2kB/s
 ]
     84%|########3 | 34.8M/41.5M [10:23<01:41, 69.0kB/s]
     84%|########3 | 34.8M/41.5M [10:24<01:30, 77.4kB/s]
     84%|########3 | 34.8M/41.5M [10:24<01:41, 68.9kB/s]
     84%|########3 | 34.8M/41.5M [10:24<01:30, 77.4kB/s]
     84%|########3 | 34.8M/41.5M [10:24<01:23, 83.3kB/s]
     84%|########4 | 34.9M/41.5M [10:24<01:19, 87.5kB/s]
     84%|########4 | 34.9M/41.5M [10:24<01:16, 90.5kB/s]
     84%|########4 | 34.9M/41.5M [10:25<01:20, 86.3kB/s]
     84%|########4 | 34.9M/41.5M [10:25<01:12, 95.9kB/s]
     84%|########4 | 34.9M/41.5M [10:25<01:23, 82.6kB/s]
     84%|########4 | 34.9M/41.5M [10:25<01:51, 62.0kB/s]
     84%|########4 | 34.9M/41.5M [10:26<01:49, 62.4kB/s]
     84%|########4 | 35.0M/41.5M [10:26<01:55, 59.4kB/s]
     84%|########4 | 35.0M/41.5M [10:26<02:00, 56.9kB/s]
     84%|########4 | 35.0M/41.5M [10:26<02:04, 54.8kB/s]
     84%|########4 | 35.0M/41.5M [10:26<02:08, 53.2kB/s]
     84%|########4 | 35.0M/41.5M [10:27<02:47, 40.7kB/s]
     84%|########4 | 35.0M
 /41.5M [10:27<02:39, 42.6kB/s]
     84%|########4 | 35.0M/41.5M [10:27<02:00, 56.4kB/s]
     84%|########4 | 35.0M/41.5M [10:27<02:04, 54.3kB/s]
     84%|########4 | 35.0M/41.5M [10:27<02:08, 52.8kB/s]
     84%|########4 | 35.0M/41.5M [10:28<02:11, 51.6kB/s]
     84%|########4 | 35.0M/41.5M [10:28<01:44, 64.7kB/s]
     84%|########4 | 35.1M/41.5M [10:28<01:52, 60.1kB/s]
     85%|########4 | 35.1M/41.5M [10:28<01:58, 56.7kB/s]
     85%|########4 | 35.1M/41.5M [10:28<02:03, 54.3kB/s]
     85%|########4 | 35.1M/41.5M [10:28<01:40, 67.1kB/s]
     85%|########4 | 35.1M/41.5M [10:29<02:21, 47.4kB/s]
     85%|########4 | 35.1M/41.5M [10:29<01:33, 71.7kB/s]
     85%|########4 | 35.1M/41.5M [10:29<02:08, 52.1kB/s]
     85%|########4 | 35.1M/41.5M [10:29<01:46, 62.3kB/s]
     85%|########4 | 35.2M/41.5M [10:30<01:39, 66.6kB/s]
     85%|########4 | 35.2M/41.5M [10:30<01:39, 66.5kB/s]
     85%|########4 | 35.2M/41.5M [10:30<01:47, 61.6kB/s]
     85%|########4 | 35.2M/41.5M [10:30<01:54, 58.0kB/
 s]
     85%|########4 | 35.2M/41.5M [10:30<01:35, 69.3kB/s]
     85%|########4 | 35.2M/41.5M [10:30<01:44, 63.3kB/s]
     85%|########4 | 35.2M/41.5M [10:31<01:29, 73.2kB/s]
     85%|########4 | 35.2M/41.5M [10:31<01:21, 80.3kB/s]
     85%|########4 | 35.2M/41.5M [10:31<01:32, 70.9kB/s]
     85%|########4 | 35.3M/41.5M [10:31<01:22, 78.8kB/s]
     85%|########4 | 35.3M/41.5M [10:31<01:40, 64.8kB/s]
     85%|########5 | 35.3M/41.5M [10:31<01:08, 94.6kB/s]
     85%|########5 | 35.3M/41.5M [10:32<01:19, 82.0kB/s]
     85%|########5 | 35.3M/41.5M [10:32<01:15, 86.2kB/s]
     85%|########5 | 35.3M/41.5M [10:32<01:12, 89.3kB/s]
     85%|########5 | 35.4M/41.5M [10:32<01:17, 83.3kB/s]
     85%|########5 | 35.4M/41.5M [10:32<01:13, 87.0kB/s]
     85%|########5 | 35.4M/41.5M [10:33<02:17, 46.6kB/s]
     85%|########5 | 35.4M/41.5M [10:33<01:34, 67.2kB/s]
     85%|########5 | 35.4M/41.5M [10:34<01:33, 67.8kB/s]
     85%|########5 | 35.4M/41.5M [10:34<01:40, 62.8kB/s]
     85%|########5 | 35.5
 M/41.5M [10:34<01:42, 61.8kB/s]
     85%|########5 | 35.5M/41.5M [10:34<01:46, 59.2kB/s]
     86%|########5 | 35.5M/41.5M [10:35<01:50, 56.9kB/s]
     86%|########5 | 35.5M/41.5M [10:35<01:54, 55.0kB/s]
     86%|########5 | 35.5M/41.5M [10:35<01:57, 53.4kB/s]
     86%|########5 | 35.5M/41.5M [10:35<02:00, 52.1kB/s]
     86%|########5 | 35.5M/41.5M [10:35<02:02, 51.2kB/s]
     86%|########5 | 35.5M/41.5M [10:35<02:04, 50.4kB/s]
     86%|########5 | 35.5M/41.5M [10:36<01:37, 63.9kB/s]
     86%|########5 | 35.5M/41.5M [10:36<01:44, 59.5kB/s]
     86%|########5 | 35.6M/41.5M [10:36<01:28, 70.5kB/s]
     86%|########5 | 35.6M/41.5M [10:36<01:36, 64.1kB/s]
     86%|########5 | 35.6M/41.5M [10:36<01:23, 73.9kB/s]
     86%|########5 | 35.6M/41.5M [10:36<01:33, 66.4kB/s]
     86%|########5 | 35.6M/41.5M [10:37<01:21, 75.6kB/s]
     86%|########5 | 35.6M/41.5M [10:37<01:14, 82.1kB/s]
     86%|########5 | 35.6M/41.5M [10:37<01:10, 86.7kB/s]
     86%|########5 | 35.6M/41.5M [10:37<01:28, 69.1kB
 /s]
     86%|########5 | 35.7M/41.5M [10:37<01:08, 89.0kB/s]
     86%|########6 | 35.7M/41.5M [10:38<00:55, 109kB/s] 
     86%|########6 | 35.7M/41.5M [10:38<01:12, 83.5kB/s]
     86%|########6 | 35.7M/41.5M [10:38<01:09, 86.7kB/s]
     86%|########6 | 35.8M/41.5M [10:38<01:07, 89.3kB/s]
     86%|########6 | 35.8M/41.5M [10:39<01:22, 72.5kB/s]
     86%|########6 | 35.8M/41.5M [10:39<01:16, 78.1kB/s]
     86%|########6 | 35.8M/41.5M [10:39<01:16, 77.8kB/s]
     86%|########6 | 35.8M/41.5M [10:39<01:12, 82.6kB/s]
     86%|########6 | 35.8M/41.5M [10:39<01:21, 72.5kB/s]
     86%|########6 | 35.8M/41.5M [10:40<01:15, 78.4kB/s]
     86%|########6 | 35.9M/41.5M [10:40<01:10, 83.2kB/s]
     86%|########6 | 35.9M/41.5M [10:40<01:07, 87.0kB/s]
     87%|########6 | 35.9M/41.5M [10:40<01:28, 66.5kB/s]
     87%|########6 | 35.9M/41.5M [10:40<01:14, 78.2kB/s]
     87%|########6 | 35.9M/41.5M [10:41<01:28, 66.2kB/s]
     87%|########6 | 35.9M/41.5M [10:41<01:33, 62.2kB/s]
     87%|########6 | 35.
 9M/41.5M [10:41<01:36, 60.6kB/s]
     87%|########6 | 35.9M/41.5M [10:41<01:41, 57.5kB/s]
     87%|########6 | 36.0M/41.5M [10:41<01:48, 53.4kB/s]
     87%|########6 | 36.0M/41.5M [10:42<01:51, 52.1kB/s]
     87%|########6 | 36.0M/41.5M [10:42<01:53, 51.1kB/s]
     87%|########6 | 36.0M/41.5M [10:42<01:54, 50.4kB/s]
     87%|########6 | 36.0M/41.5M [10:42<01:55, 49.9kB/s]
     87%|########6 | 36.0M/41.5M [10:42<01:56, 49.5kB/s]
     87%|########6 | 36.0M/41.5M [10:42<01:56, 49.3kB/s]
     87%|########6 | 36.0M/41.5M [10:43<01:57, 49.1kB/s]
     87%|########6 | 36.0M/41.5M [10:43<01:57, 49.0kB/s]
     87%|########6 | 36.0M/41.5M [10:43<01:57, 48.9kB/s]
     87%|########6 | 36.0M/41.5M [10:43<01:57, 48.8kB/s]
     87%|########6 | 36.0M/41.5M [10:43<01:30, 63.3kB/s]
     87%|########6 | 36.1M/41.5M [10:43<01:36, 58.9kB/s]
     87%|########6 | 36.1M/41.5M [10:44<01:20, 70.4kB/s]
     87%|########6 | 36.1M/41.5M [10:44<01:28, 63.9kB/s]
     87%|########6 | 36.1M/41.5M [10:44<01:35, 59.3k
 B/s]
     87%|########7 | 36.1M/41.5M [10:44<01:06, 85.3kB/s]
     87%|########7 | 36.1M/41.5M [10:45<01:22, 68.4kB/s]
     87%|########7 | 36.1M/41.5M [10:45<01:14, 75.5kB/s]
     87%|########7 | 36.2M/41.5M [10:45<01:08, 81.2kB/s]
     87%|########7 | 36.2M/41.5M [10:45<01:05, 85.6kB/s]
     87%|########7 | 36.2M/41.5M [10:45<01:02, 88.9kB/s]
     87%|########7 | 36.2M/41.5M [10:45<01:05, 85.2kB/s]
     87%|########7 | 36.2M/41.5M [10:46<01:15, 73.4kB/s]
     87%|########7 | 36.2M/41.5M [10:46<01:09, 79.3kB/s]
     87%|########7 | 36.2M/41.5M [10:46<01:05, 84.0kB/s]
     87%|########7 | 36.3M/41.5M [10:46<01:02, 87.6kB/s]
     87%|########7 | 36.3M/41.5M [10:46<01:08, 79.5kB/s]
     87%|########7 | 36.3M/41.5M [10:47<01:04, 84.1kB/s]
     88%|########7 | 36.3M/41.5M [10:47<01:06, 82.0kB/s]
     88%|########7 | 36.3M/41.5M [10:47<01:02, 86.1kB/s]
     88%|########7 | 36.3M/41.5M [10:47<01:00, 89.2kB/s]
     88%|########7 | 36.4M/41.5M [10:47<00:54, 98.5kB/s]
     88%|########7 | 36
 .4M/41.5M [10:47<00:58, 91.2kB/s]
     88%|########7 | 36.4M/41.5M [10:48<00:57, 92.9kB/s]
     88%|########7 | 36.4M/41.5M [10:48<00:52, 102kB/s] 
     88%|########7 | 36.4M/41.5M [10:48<00:52, 100kB/s]
     88%|########7 | 36.4M/41.5M [10:48<00:53, 99.5kB/s]
     88%|########7 | 36.5M/41.5M [10:48<00:49, 106kB/s] 
     88%|########7 | 36.5M/41.5M [10:48<00:46, 113kB/s]
     88%|########7 | 36.5M/41.5M [10:49<00:44, 117kB/s]
     88%|########7 | 36.5M/41.5M [10:49<00:47, 111kB/s]
     88%|########8 | 36.5M/41.5M [10:49<00:53, 98.1kB/s]
     88%|########8 | 36.5M/41.5M [10:49<00:46, 113kB/s] 
     88%|########8 | 36.6M/41.5M [10:49<00:44, 117kB/s]
     88%|########8 | 36.6M/41.5M [10:49<00:46, 110kB/s]
     88%|########8 | 36.6M/41.5M [10:50<00:48, 106kB/s]
     88%|########8 | 36.6M/41.5M [10:50<00:43, 119kB/s]
     88%|########8 | 36.6M/41.5M [10:50<00:41, 122kB/s]
     88%|########8 | 36.6M/41.5M [10:50<00:51, 98.0kB/s]
     88%|########8 | 36.7M/41.5M [10:50<00:48, 105kB/s] 
   
   88%|########8 | 36.7M/41.5M [10:50<00:52, 95.7kB/s]
     88%|########8 | 36.7M/41.5M [10:51<01:00, 83.8kB/s]
     88%|########8 | 36.7M/41.5M [10:51<01:04, 77.3kB/s]
     89%|########8 | 36.7M/41.5M [10:51<01:15, 65.8kB/s]
     89%|########8 | 36.7M/41.5M [10:51<01:12, 69.0kB/s]
     89%|########8 | 36.7M/41.5M [10:52<01:17, 64.3kB/s]
     89%|########8 | 36.8M/41.5M [10:52<01:17, 64.4kB/s]
     89%|########8 | 36.8M/41.5M [10:52<01:22, 60.1kB/s]
     89%|########8 | 36.8M/41.5M [10:52<01:27, 56.8kB/s]
     89%|########8 | 36.8M/41.5M [10:52<01:12, 68.4kB/s]
     89%|########8 | 36.8M/41.5M [10:52<01:18, 62.7kB/s]
     89%|########8 | 36.8M/41.5M [10:53<01:07, 72.8kB/s]
     89%|########8 | 36.8M/41.5M [10:53<01:14, 65.7kB/s]
     89%|########8 | 36.8M/41.5M [10:53<01:26, 56.5kB/s]
     89%|########8 | 36.8M/41.5M [10:53<01:36, 50.8kB/s]
     89%|########8 | 36.9M/41.5M [10:53<01:08, 71.0kB/s]
     89%|########8 | 36.9M/41.5M [10:54<01:14, 65.1kB/s]
     89%|########8 | 36.9M/41.5
 M [10:54<01:09, 69.9kB/s]
     89%|########8 | 36.9M/41.5M [10:54<01:11, 68.0kB/s]
     89%|########8 | 36.9M/41.5M [10:54<01:06, 72.1kB/s]
     89%|########8 | 36.9M/41.5M [10:54<01:08, 69.7kB/s]
     89%|########8 | 36.9M/41.5M [10:54<01:05, 73.3kB/s]
     89%|########9 | 36.9M/41.5M [10:55<01:07, 70.5kB/s]
     89%|########9 | 36.9M/41.5M [10:55<01:00, 78.7kB/s]
     89%|########9 | 37.0M/41.5M [10:55<00:56, 84.3kB/s]
     89%|########9 | 37.0M/41.5M [10:55<01:13, 64.6kB/s]
     89%|########9 | 37.0M/41.5M [10:55<00:53, 88.5kB/s]
     89%|########9 | 37.0M/41.5M [10:56<00:51, 90.8kB/s]
     89%|########9 | 37.0M/41.5M [10:56<00:50, 92.6kB/s]
     89%|########9 | 37.0M/41.5M [10:56<00:49, 93.9kB/s]
     89%|########9 | 37.1M/41.5M [10:56<00:48, 94.9kB/s]
     89%|########9 | 37.1M/41.5M [10:56<00:48, 95.6kB/s]
     89%|########9 | 37.1M/41.5M [10:56<00:47, 96.1kB/s]
     89%|########9 | 37.1M/41.5M [10:57<00:47, 96.5kB/s]
     89%|########9 | 37.1M/41.5M [10:57<00:47, 96.7kB/s]
  
    90%|########9 | 37.1M/41.5M [10:57<00:55, 81.9kB/s]
     90%|########9 | 37.2M/41.5M [10:57<00:50, 89.9kB/s]
     90%|########9 | 37.2M/41.5M [10:57<00:43, 105kB/s] 
     90%|########9 | 37.2M/41.5M [10:58<00:43, 103kB/s]
     90%|########9 | 37.2M/41.5M [10:58<00:44, 101kB/s]
     90%|########9 | 37.2M/41.5M [10:58<00:44, 100kB/s]
     90%|########9 | 37.2M/41.5M [10:58<00:57, 76.9kB/s]
     90%|########9 | 37.3M/41.5M [10:59<00:58, 75.3kB/s]
     90%|########9 | 37.3M/41.5M [10:59<00:52, 83.7kB/s]
     90%|########9 | 37.3M/41.5M [10:59<00:52, 83.0kB/s]
     90%|########9 | 37.3M/41.5M [10:59<00:55, 79.4kB/s]
     90%|########9 | 37.3M/41.5M [10:59<01:00, 72.4kB/s]
     90%|######### | 37.3M/41.5M [11:00<01:02, 70.1kB/s]
     90%|######### | 37.4M/41.5M [11:00<00:56, 77.1kB/s]
     90%|######### | 37.4M/41.5M [11:00<00:52, 82.6kB/s]
     90%|######### | 37.4M/41.5M [11:00<01:06, 65.0kB/s]
     90%|######### | 37.4M/41.5M [11:00<00:48, 87.4kB/s]
     90%|######### | 37.4M/41.5M 
 [11:01<01:11, 59.5kB/s]
     90%|######### | 37.4M/41.5M [11:01<01:03, 66.8kB/s]
     90%|######### | 37.5M/41.5M [11:01<00:57, 73.4kB/s]
     90%|######### | 37.5M/41.5M [11:02<01:29, 47.2kB/s]
     90%|######### | 37.5M/41.5M [11:02<01:28, 47.5kB/s]
     90%|######### | 37.5M/41.5M [11:02<01:44, 39.9kB/s]
     90%|######### | 37.5M/41.5M [11:03<01:40, 41.6kB/s]
     90%|######### | 37.5M/41.5M [11:03<01:36, 43.1kB/s]
     90%|######### | 37.5M/41.5M [11:03<01:33, 44.4kB/s]
     90%|######### | 37.5M/41.5M [11:03<01:54, 36.4kB/s]
     90%|######### | 37.5M/41.5M [11:03<01:46, 39.1kB/s]
     90%|######### | 37.5M/41.5M [11:04<01:40, 41.4kB/s]
     90%|######### | 37.5M/41.5M [11:04<01:35, 43.2kB/s]
     91%|######### | 37.6M/41.5M [11:04<01:32, 44.7kB/s]
     91%|######### | 37.6M/41.5M [11:04<01:29, 45.8kB/s]
     91%|######### | 37.6M/41.5M [11:04<01:28, 46.6kB/s]
     91%|######### | 37.6M/41.5M [11:04<01:26, 47.2kB/s]
     91%|######### | 37.6M/41.5M [11:05<01:06, 61.8kB/s]
    
  91%|######### | 37.6M/41.5M [11:05<01:31, 44.8kB/s]
     91%|######### | 37.6M/41.5M [11:05<01:10, 57.7kB/s]
     91%|######### | 37.6M/41.5M [11:05<01:13, 55.3kB/s]
     91%|######### | 37.6M/41.5M [11:05<01:15, 53.5kB/s]
     91%|######### | 37.6M/41.5M [11:06<01:01, 65.7kB/s]
     91%|######### | 37.7M/41.5M [11:06<01:06, 60.9kB/s]
     91%|######### | 37.7M/41.5M [11:06<00:56, 71.4kB/s]
     91%|######### | 37.7M/41.5M [11:06<01:01, 64.7kB/s]
     91%|######### | 37.7M/41.5M [11:06<01:06, 60.0kB/s]
     91%|######### | 37.7M/41.5M [11:07<01:31, 43.7kB/s]
     91%|######### | 37.7M/41.5M [11:07<00:57, 68.7kB/s]
     91%|######### | 37.7M/41.5M [11:07<01:08, 57.6kB/s]
     91%|######### | 37.8M/41.5M [11:07<00:56, 69.9kB/s]
     91%|#########1| 37.8M/41.5M [11:08<01:03, 61.5kB/s]
     91%|#########1| 37.8M/41.5M [11:08<01:15, 51.7kB/s]
     91%|#########1| 37.8M/41.5M [11:08<01:12, 53.3kB/s]
     91%|#########1| 37.8M/41.5M [11:09<01:13, 52.4kB/s]
     91%|#########1| 37.8M/41.5M
  [11:09<01:14, 51.6kB/s]
     91%|#########1| 37.8M/41.5M [11:09<01:01, 62.3kB/s]
     91%|#########1| 37.8M/41.5M [11:09<01:05, 58.9kB/s]
     91%|#########1| 37.9M/41.5M [11:09<00:59, 63.7kB/s]
     91%|#########1| 37.9M/41.5M [11:09<00:58, 65.2kB/s]
     91%|#########1| 37.9M/41.5M [11:10<01:02, 60.5kB/s]
     91%|#########1| 37.9M/41.5M [11:10<01:13, 51.4kB/s]
     91%|#########1| 37.9M/41.5M [11:10<00:56, 67.0kB/s]
     91%|#########1| 37.9M/41.5M [11:10<01:07, 55.6kB/s]
     91%|#########1| 37.9M/41.5M [11:11<00:57, 64.7kB/s]
     91%|#########1| 37.9M/41.5M [11:11<01:01, 60.9kB/s]
     91%|#########1| 37.9M/41.5M [11:11<01:04, 57.8kB/s]
     91%|#########1| 38.0M/41.5M [11:11<01:06, 55.4kB/s]
     91%|#########1| 38.0M/41.5M [11:11<01:21, 45.5kB/s]
     92%|#########1| 38.0M/41.5M [11:12<01:08, 53.6kB/s]
     92%|#########1| 38.0M/41.5M [11:12<01:10, 52.3kB/s]
     92%|#########1| 38.0M/41.5M [11:12<01:11, 51.4kB/s]
     92%|#########1| 38.0M/41.5M [11:12<01:31, 39.8kB/s]
   
   92%|#########1| 38.0M/41.5M [11:12<01:08, 53.3kB/s]
     92%|#########1| 38.0M/41.5M [11:13<01:27, 41.6kB/s]
     92%|#########1| 38.0M/41.5M [11:13<01:06, 54.1kB/s]
     92%|#########1| 38.0M/41.5M [11:13<01:41, 35.6kB/s]
     92%|#########1| 38.1M/41.5M [11:14<01:34, 38.1kB/s]
     92%|#########1| 38.1M/41.5M [11:14<01:29, 40.3kB/s]
     92%|#########1| 38.1M/41.5M [11:14<01:44, 34.3kB/s]
     92%|#########1| 38.1M/41.5M [11:14<01:35, 37.3kB/s]
     92%|#########1| 38.1M/41.5M [11:15<02:10, 27.3kB/s]
     92%|#########1| 38.1M/41.5M [11:15<01:28, 40.2kB/s]
     92%|#########1| 38.1M/41.5M [11:15<01:24, 42.0kB/s]
     92%|#########1| 38.1M/41.5M [11:15<01:21, 43.5kB/s]
     92%|#########1| 38.1M/41.5M [11:16<01:57, 30.0kB/s]
     92%|#########1| 38.1M/41.5M [11:16<01:21, 42.8kB/s]
     92%|#########1| 38.1M/41.5M [11:16<01:19, 44.0kB/s]
     92%|#########1| 38.2M/41.5M [11:16<01:17, 45.1kB/s]
     92%|#########1| 38.2M/41.5M [11:17<01:53, 30.6kB/s]
     92%|#########2| 38.2M/41.5
 M [11:17<01:19, 43.4kB/s]
     92%|#########2| 38.2M/41.5M [11:17<01:17, 44.6kB/s]
     92%|#########2| 38.2M/41.5M [11:17<01:15, 45.5kB/s]
     92%|#########2| 38.2M/41.5M [11:18<01:14, 46.3kB/s]
     92%|#########2| 38.2M/41.5M [11:18<01:13, 46.9kB/s]
     92%|#########2| 38.2M/41.5M [11:18<01:12, 47.4kB/s]
     92%|#########2| 38.2M/41.5M [11:18<01:11, 47.8kB/s]
     92%|#########2| 38.2M/41.5M [11:18<01:11, 48.0kB/s]
     92%|#########2| 38.2M/41.5M [11:18<01:10, 48.2kB/s]
     92%|#########2| 38.2M/41.5M [11:18<01:02, 54.1kB/s]
     92%|#########2| 38.3M/41.5M [11:19<00:55, 60.8kB/s]
     92%|#########2| 38.3M/41.5M [11:19<00:52, 64.1kB/s]
     92%|#########2| 38.3M/41.5M [11:19<00:50, 66.9kB/s]
     92%|#########2| 38.3M/41.5M [11:19<00:48, 68.9kB/s]
     92%|#########2| 38.3M/41.5M [11:19<00:42, 77.8kB/s]
     92%|#########2| 38.3M/41.5M [11:20<00:56, 58.9kB/s]
     92%|#########2| 38.3M/41.5M [11:20<00:48, 68.0kB/s]
     92%|#########2| 38.4M/41.5M [11:20<00:39, 82.9kB/s]
  
    92%|#########2| 38.4M/41.5M [11:20<00:41, 79.0kB/s]
     93%|#########2| 38.4M/41.5M [11:20<00:38, 84.0kB/s]
     93%|#########2| 38.4M/41.5M [11:21<00:36, 87.7kB/s]
     93%|#########2| 38.4M/41.5M [11:21<00:35, 90.4kB/s]
     93%|#########2| 38.4M/41.5M [11:21<00:37, 86.0kB/s]
     93%|#########2| 38.5M/41.5M [11:21<00:35, 89.1kB/s]
     93%|#########2| 38.5M/41.5M [11:21<00:41, 75.6kB/s]
     93%|#########2| 38.5M/41.5M [11:22<00:35, 87.4kB/s]
     93%|#########2| 38.5M/41.5M [11:22<00:34, 89.9kB/s]
     93%|#########2| 38.5M/41.5M [11:22<00:33, 91.9kB/s]
     93%|#########2| 38.5M/41.5M [11:22<00:33, 93.4kB/s]
     93%|#########2| 38.6M/41.5M [11:22<00:32, 94.5kB/s]
     93%|#########2| 38.6M/41.5M [11:22<00:32, 95.3kB/s]
     93%|#########3| 38.6M/41.5M [11:23<00:31, 95.9kB/s]
     93%|#########3| 38.6M/41.5M [11:23<00:31, 96.3kB/s]
     93%|#########3| 38.6M/41.5M [11:23<00:40, 74.6kB/s]
     93%|#########3| 38.6M/41.5M [11:23<00:32, 92.1kB/s]
     93%|#########3| 38.7M/41.
 5M [11:24<00:33, 87.5kB/s]
     93%|#########3| 38.7M/41.5M [11:24<00:32, 90.0kB/s]
     93%|#########3| 38.7M/41.5M [11:24<00:31, 92.2kB/s]
     93%|#########3| 38.7M/41.5M [11:24<00:31, 93.6kB/s]
     93%|#########3| 38.7M/41.5M [11:24<00:30, 94.7kB/s]
     93%|#########3| 38.7M/41.5M [11:24<00:30, 95.4kB/s]
     93%|#########3| 38.8M/41.5M [11:25<00:38, 74.2kB/s]
     93%|#########3| 38.8M/41.5M [11:25<00:42, 67.5kB/s]
     93%|#########3| 38.8M/41.5M [11:25<00:35, 80.6kB/s]
     94%|#########3| 38.8M/41.5M [11:26<00:41, 68.5kB/s]
     94%|#########3| 38.8M/41.5M [11:26<00:43, 64.1kB/s]
     94%|#########3| 38.8M/41.5M [11:26<00:43, 64.4kB/s]
     94%|#########3| 38.8M/41.5M [11:26<00:40, 68.4kB/s]
     94%|#########3| 38.8M/41.5M [11:26<00:43, 63.3kB/s]
     94%|#########3| 38.9M/41.5M [11:26<00:38, 72.4kB/s]
     94%|#########3| 38.9M/41.5M [11:26<00:38, 71.0kB/s]
     94%|#########3| 38.9M/41.5M [11:27<00:37, 73.2kB/s]
     94%|#########3| 38.9M/41.5M [11:27<00:33, 80.0kB/s]
 
     94%|#########3| 38.9M/41.5M [11:27<00:35, 76.8kB/s]
     94%|#########3| 38.9M/41.5M [11:27<00:32, 83.1kB/s]
     94%|#########3| 38.9M/41.5M [11:27<00:28, 94.8kB/s]
     94%|#########3| 39.0M/41.5M [11:28<00:35, 74.2kB/s]
     94%|#########3| 39.0M/41.5M [11:28<00:28, 91.6kB/s]
     94%|#########3| 39.0M/41.5M [11:28<00:28, 92.9kB/s]
     94%|#########4| 39.0M/41.5M [11:28<00:27, 94.1kB/s]
     94%|#########4| 39.0M/41.5M [11:28<00:27, 95.0kB/s]
     94%|#########4| 39.0M/41.5M [11:29<00:26, 95.7kB/s]
     94%|#########4| 39.1M/41.5M [11:29<00:36, 70.6kB/s]
     94%|#########4| 39.1M/41.5M [11:29<00:28, 88.1kB/s]
     94%|#########4| 39.1M/41.5M [11:30<00:53, 47.0kB/s]
     94%|#########4| 39.1M/41.5M [11:30<00:30, 80.1kB/s]
     94%|#########4| 39.2M/41.5M [11:30<00:34, 70.2kB/s]
     94%|#########4| 39.2M/41.5M [11:31<00:32, 75.2kB/s]
     94%|#########4| 39.2M/41.5M [11:31<00:30, 79.8kB/s]
     94%|#########4| 39.2M/41.5M [11:31<00:28, 83.8kB/s]
     95%|#########4| 39.2M/41
 .5M [11:31<00:30, 77.0kB/s]
     95%|#########4| 39.2M/41.5M [11:31<00:32, 73.6kB/s]
     95%|#########4| 39.2M/41.5M [11:32<00:31, 74.8kB/s]
     95%|#########4| 39.3M/41.5M [11:32<00:31, 75.2kB/s]
     95%|#########4| 39.3M/41.5M [11:32<00:31, 73.7kB/s]
     95%|#########4| 39.3M/41.5M [11:32<00:32, 71.9kB/s]
     95%|#########4| 39.3M/41.5M [11:32<00:41, 56.3kB/s]
     95%|#########4| 39.3M/41.5M [11:33<00:30, 74.3kB/s]
     95%|#########4| 39.3M/41.5M [11:33<00:28, 80.2kB/s]
     95%|#########4| 39.3M/41.5M [11:33<00:29, 76.0kB/s]
     95%|#########4| 39.4M/41.5M [11:33<00:27, 81.5kB/s]
     95%|#########4| 39.4M/41.5M [11:33<00:29, 74.4kB/s]
     95%|#########4| 39.4M/41.5M [11:34<00:27, 80.1kB/s]
     95%|#########4| 39.4M/41.5M [11:34<00:54, 40.1kB/s]
     95%|#########5| 39.4M/41.5M [11:35<00:29, 73.0kB/s]
     95%|#########5| 39.5M/41.5M [11:35<00:32, 65.5kB/s]
     95%|#########5| 39.5M/41.5M [11:35<00:29, 71.2kB/s]
     95%|#########5| 39.5M/41.5M [11:35<00:32, 63.6kB/s]
      95%|#########5| 39.5M/41.5M [11:36<00:52, 39.9kB/s]
     95%|#########5| 39.5M/41.5M [11:36<00:38, 53.4kB/s]
     95%|#########5| 39.5M/41.5M [11:37<00:37, 54.5kB/s]
     95%|#########5| 39.6M/41.5M [11:37<00:34, 58.6kB/s]
     95%|#########5| 39.6M/41.5M [11:37<00:35, 56.8kB/s]
     95%|#########5| 39.6M/41.5M [11:37<00:33, 59.0kB/s]
     95%|#########5| 39.6M/41.5M [11:37<00:35, 56.5kB/s]
     95%|#########5| 39.6M/41.5M [11:38<00:32, 61.8kB/s]
     95%|#########5| 39.6M/41.5M [11:38<00:33, 58.4kB/s]
     96%|#########5| 39.6M/41.5M [11:38<00:28, 68.8kB/s]
     96%|#########5| 39.6M/41.5M [11:38<00:25, 76.7kB/s]
     96%|#########5| 39.6M/41.5M [11:38<00:28, 68.7kB/s]
     96%|#########5| 39.7M/41.5M [11:38<00:24, 77.0kB/s]
     96%|#########5| 39.7M/41.5M [11:39<00:27, 68.7kB/s]
     96%|#########5| 39.7M/41.5M [11:39<00:24, 77.1kB/s]
     96%|#########5| 39.7M/41.5M [11:39<00:22, 83.1kB/s]
     96%|#########5| 39.7M/41.5M [11:39<00:27, 67.2kB/s]
     96%|#########5| 39.7M/4
 1.5M [11:39<00:24, 74.6kB/s]
     96%|#########5| 39.7M/41.5M [11:40<00:26, 67.9kB/s]
     96%|#########5| 39.8M/41.5M [11:40<00:30, 58.8kB/s]
     96%|#########5| 39.8M/41.5M [11:40<00:24, 73.7kB/s]
     96%|#########5| 39.8M/41.5M [11:40<00:42, 42.2kB/s]
     96%|#########5| 39.8M/41.5M [11:41<00:33, 53.6kB/s]
     96%|#########5| 39.8M/41.5M [11:41<00:33, 52.4kB/s]
     96%|#########5| 39.8M/41.5M [11:41<00:34, 51.5kB/s]
     96%|#########5| 39.8M/41.5M [11:41<00:27, 63.4kB/s]
     96%|#########5| 39.8M/41.5M [11:41<00:37, 46.7kB/s]
     96%|#########6| 39.8M/41.5M [11:42<00:36, 47.5kB/s]
     96%|#########6| 39.9M/41.5M [11:42<00:35, 47.7kB/s]
     96%|#########6| 39.9M/41.5M [11:42<00:35, 48.1kB/s]
     96%|#########6| 39.9M/41.5M [11:43<00:30, 54.4kB/s]
     96%|#########6| 39.9M/41.5M [11:43<00:29, 57.7kB/s]
     96%|#########6| 39.9M/41.5M [11:43<00:30, 55.4kB/s]
     96%|#########6| 39.9M/41.5M [11:43<00:30, 53.6kB/s]
     96%|#########6| 39.9M/41.5M [11:43<00:25, 65.6kB/s]
 
     96%|#########6| 39.9M/41.5M [11:43<00:26, 60.8kB/s]
     96%|#########6| 39.9M/41.5M [11:43<00:22, 71.3kB/s]
     96%|#########6| 40.0M/41.5M [11:44<00:24, 64.7kB/s]
     96%|#########6| 40.0M/41.5M [11:44<00:26, 60.0kB/s]
     96%|#########6| 40.0M/41.5M [11:44<00:22, 71.0kB/s]
     96%|#########6| 40.0M/41.5M [11:44<00:31, 49.6kB/s]
     96%|#########6| 40.0M/41.5M [11:45<00:22, 69.2kB/s]
     96%|#########6| 40.0M/41.5M [11:45<00:20, 76.2kB/s]
     96%|#########6| 40.0M/41.5M [11:45<00:22, 69.0kB/s]
     97%|#########6| 40.0M/41.5M [11:45<00:19, 76.7kB/s]
     97%|#########6| 40.1M/41.5M [11:45<00:27, 53.8kB/s]
     97%|#########6| 40.1M/41.5M [11:46<00:16, 87.2kB/s]
     97%|#########6| 40.1M/41.5M [11:46<00:20, 71.5kB/s]
     97%|#########6| 40.1M/41.5M [11:46<00:19, 75.3kB/s]
     97%|#########6| 40.1M/41.5M [11:46<00:21, 66.6kB/s]
     97%|#########6| 40.1M/41.5M [11:47<00:19, 71.5kB/s]
     97%|#########6| 40.2M/41.5M [11:47<00:20, 68.0kB/s]
     97%|#########6| 40.2M/
 41.5M [11:47<00:22, 63.0kB/s]
     97%|#########6| 40.2M/41.5M [11:47<00:19, 72.2kB/s]
     97%|#########6| 40.2M/41.5M [11:47<00:26, 51.5kB/s]
     97%|#########6| 40.2M/41.5M [11:48<00:15, 85.3kB/s]
     97%|#########6| 40.2M/41.5M [11:48<00:15, 85.8kB/s]
     97%|#########7| 40.2M/41.5M [11:48<00:17, 72.2kB/s]
     97%|#########7| 40.3M/41.5M [11:48<00:16, 78.1kB/s]
     97%|#########7| 40.3M/41.5M [11:48<00:15, 82.9kB/s]
     97%|#########7| 40.3M/41.5M [11:49<00:14, 86.6kB/s]
     97%|#########7| 40.3M/41.5M [11:49<00:13, 89.5kB/s]
     97%|#########7| 40.3M/41.5M [11:49<00:14, 83.0kB/s]
     97%|#########7| 40.3M/41.5M [11:49<00:13, 86.8kB/s]
     97%|#########7| 40.4M/41.5M [11:49<00:15, 76.5kB/s]
     97%|#########7| 40.4M/41.5M [11:50<00:14, 81.7kB/s]
     97%|#########7| 40.4M/41.5M [11:50<00:13, 85.7kB/s]
     97%|#########7| 40.4M/41.5M [11:50<00:12, 88.9kB/s]
     97%|#########7| 40.4M/41.5M [11:50<00:12, 91.2kB/s]
     97%|#########7| 40.4M/41.5M [11:50<00:11, 92.9kB/s
 ]
     98%|#########7| 40.5M/41.5M [11:50<00:11, 94.2kB/s]
     98%|#########7| 40.5M/41.5M [11:51<00:11, 95.1kB/s]
     98%|#########7| 40.5M/41.5M [11:51<00:11, 95.8kB/s]
     98%|#########7| 40.5M/41.5M [11:51<00:10, 96.2kB/s]
     98%|#########7| 40.5M/41.5M [11:51<00:09, 108kB/s] 
     98%|#########7| 40.5M/41.5M [11:51<00:10, 93.5kB/s]
     98%|#########7| 40.5M/41.5M [11:51<00:11, 87.8kB/s]
     98%|#########7| 40.6M/41.5M [11:52<00:12, 74.9kB/s]
     98%|#########7| 40.6M/41.5M [11:52<00:14, 66.2kB/s]
     98%|#########7| 40.6M/41.5M [11:53<00:20, 47.4kB/s]
     98%|#########7| 40.6M/41.5M [11:53<00:18, 51.5kB/s]
     98%|#########7| 40.6M/41.5M [11:53<00:18, 50.8kB/s]
     98%|#########7| 40.6M/41.5M [11:53<00:18, 50.3kB/s]
     98%|#########7| 40.6M/41.5M [11:53<00:18, 49.9kB/s]
     98%|#########7| 40.6M/41.5M [11:53<00:18, 49.5kB/s]
     98%|#########7| 40.6M/41.5M [11:53<00:18, 49.3kB/s]
     98%|#########7| 40.6M/41.5M [11:54<00:18, 49.1kB/s]
     98%|#########7| 40.6M
 /41.5M [11:54<00:23, 37.9kB/s]
     98%|#########8| 40.7M/41.5M [11:54<00:24, 35.2kB/s]
     98%|#########8| 40.7M/41.5M [11:55<00:24, 34.1kB/s]
     98%|#########8| 40.7M/41.5M [11:55<00:26, 31.3kB/s]
     98%|#########8| 40.7M/41.5M [11:55<00:24, 34.2kB/s]
     98%|#########8| 40.7M/41.5M [11:56<00:22, 37.0kB/s]
     98%|#########8| 40.7M/41.5M [11:56<00:20, 39.5kB/s]
     98%|#########8| 40.7M/41.5M [11:56<00:19, 41.7kB/s]
     98%|#########8| 40.7M/41.5M [11:56<00:18, 43.5kB/s]
     98%|#########8| 40.7M/41.5M [11:56<00:17, 44.8kB/s]
     98%|#########8| 40.7M/41.5M [11:56<00:17, 45.9kB/s]
     98%|#########8| 40.8M/41.5M [11:57<00:12, 60.4kB/s]
     98%|#########8| 40.8M/41.5M [11:57<00:18, 42.0kB/s]
     98%|#########8| 40.8M/41.5M [11:57<00:12, 58.1kB/s]
     98%|#########8| 40.8M/41.5M [11:57<00:13, 55.6kB/s]
     98%|#########8| 40.8M/41.5M [11:57<00:13, 53.8kB/s]
     98%|#########8| 40.8M/41.5M [11:58<00:10, 65.9kB/s]
     98%|#########8| 40.8M/41.5M [11:58<00:11, 61.0kB/
 s]
     98%|#########8| 40.8M/41.5M [11:58<00:09, 71.5kB/s]
     98%|#########8| 40.8M/41.5M [11:58<00:10, 64.8kB/s]
     98%|#########8| 40.9M/41.5M [11:58<00:11, 60.1kB/s]
     98%|#########8| 40.9M/41.5M [11:59<00:09, 71.1kB/s]
     99%|#########8| 40.9M/41.5M [11:59<00:10, 64.4kB/s]
     99%|#########8| 40.9M/41.5M [11:59<00:10, 59.7kB/s]
     99%|#########8| 40.9M/41.5M [11:59<00:08, 70.9kB/s]
     99%|#########8| 40.9M/41.5M [11:59<00:09, 64.3kB/s]
     99%|#########8| 40.9M/41.5M [12:00<00:13, 45.9kB/s]
     99%|#########8| 40.9M/41.5M [12:00<00:06, 82.5kB/s]
     99%|#########8| 41.0M/41.5M [12:00<00:08, 68.5kB/s]
     99%|#########8| 41.0M/41.5M [12:00<00:07, 75.1kB/s]
     99%|#########8| 41.0M/41.5M [12:00<00:06, 80.5kB/s]
     99%|#########8| 41.0M/41.5M [12:01<00:05, 84.9kB/s]
     99%|#########8| 41.0M/41.5M [12:01<00:05, 88.3kB/s]
     99%|#########8| 41.0M/41.5M [12:01<00:05, 90.7kB/s]
     99%|#########8| 41.1M/41.5M [12:01<00:04, 92.6kB/s]
     99%|#########8| 41.1
 M/41.5M [12:01<00:06, 72.9kB/s]
     99%|#########9| 41.1M/41.5M [12:02<00:05, 78.9kB/s]
     99%|#########9| 41.1M/41.5M [12:02<00:04, 83.6kB/s]
     99%|#########9| 41.1M/41.5M [12:02<00:04, 87.3kB/s]
     99%|#########9| 41.1M/41.5M [12:02<00:04, 90.1kB/s]
     99%|#########9| 41.1M/41.5M [12:02<00:04, 71.8kB/s]
     99%|#########9| 41.2M/41.5M [12:03<00:03, 89.6kB/s]
     99%|#########9| 41.2M/41.5M [12:03<00:03, 85.9kB/s]
     99%|#########9| 41.2M/41.5M [12:03<00:03, 83.1kB/s]
     99%|#########9| 41.2M/41.5M [12:03<00:04, 63.0kB/s]
     99%|#########9| 41.2M/41.5M [12:04<00:03, 69.9kB/s]
     99%|#########9| 41.2M/41.5M [12:04<00:04, 62.0kB/s]
     99%|#########9| 41.3M/41.5M [12:04<00:03, 62.7kB/s]
     99%|#########9| 41.3M/41.5M [12:04<00:04, 54.0kB/s]
     99%|#########9| 41.3M/41.5M [12:05<00:04, 52.9kB/s]
    100%|#########9| 41.3M/41.5M [12:05<00:03, 56.6kB/s]
    100%|#########9| 41.3M/41.5M [12:05<00:03, 49.0kB/s]
    100%|#########9| 41.3M/41.5M [12:05<00:02, 62.0kB
 /s]
    100%|#########9| 41.3M/41.5M [12:06<00:02, 59.3kB/s]
    100%|#########9| 41.4M/41.5M [12:06<00:02, 67.9kB/s]
    100%|#########9| 41.4M/41.5M [12:06<00:02, 63.2kB/s]
    100%|#########9| 41.4M/41.5M [12:06<00:02, 59.4kB/s]
    100%|#########9| 41.4M/41.5M [12:06<00:01, 69.6kB/s]
    100%|#########9| 41.4M/41.5M [12:07<00:01, 63.8kB/s]
    100%|#########9| 41.4M/41.5M [12:07<00:01, 73.3kB/s]
    100%|#########9| 41.4M/41.5M [12:07<00:00, 75.1kB/s]
    100%|#########9| 41.4M/41.5M [12:07<00:00, 81.3kB/s]
    100%|#########9| 41.5M/41.5M [12:07<00:00, 66.8kB/s]
    100%|#########9| 41.5M/41.5M [12:08<00:00, 65.9kB/s]
    100%|##########| 41.5M/41.5M [12:08<00:00, 59.7kB/s]
 
 
 
@@ -319,6 +319,11 @@ Look up prediction top 1 index in 1000 class synset.
 
 
 
+.. rst-class:: sphx-glr-timing
+
+   **Total running time of the script:** ( 12 minutes  34.822 seconds)
+
+
 .. _sphx_glr_download_how_to_compile_models_from_oneflow.py:
 
 .. only:: html
diff --git a/docs/_sources/how_to/compile_models/from_paddle.rst.txt b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
index e48d9c636..527d1ee80 100644
--- a/docs/_sources/how_to/compile_models/from_paddle.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
@@ -235,7 +235,7 @@ Look up prediction top 1 index in 1000 class synset.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  6.796 seconds)
+   **Total running time of the script:** ( 1 minutes  7.818 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_paddle.py:
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 62b19c466..57008b30a 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -93,7 +93,7 @@ Load a pretrained PyTorch model
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     12%|#1        | 5.29M/44.7M [00:00<00:00, 53.3MB/s]
     23%|##3       | 10.4M/44.7M [00:00<00:00, 51.9MB/s]
     55%|#####5    | 24.8M/44.7M [00:00<00:00, 96.0MB/s]
     98%|#########8| 43.8M/44.7M [00:00<00:00, 136MB/s] 
    100%|##########| 44.7M/44.7M [00:00<00:00, 113MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     37%|###7      | 16.7M/44.7M [00:00<00:00, 175MB/s]
     90%|######### | 40.3M/44.7M [00:00<00:00, 217MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 215MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 2bc4be6a5..316d73793 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -422,7 +422,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  0.020 seconds)
+   **Total running time of the script:** ( 1 minutes  1.473 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 48585c03b..163163054 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**05:34.325** total execution time for **how_to_compile_models** files:
+**17:26.711** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 01:06.796 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 12:34.822 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:00.020 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 01:07.818 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 00:57.536 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:01.473 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:38.763 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 00:56.905 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:31.710 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.764 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:22.751 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:23.765 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:21.401 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:21.555 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:18.816 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:19.632 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:13.726 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:13.360 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.806 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.616 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 3d8caddac..ec74ff637 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -440,7 +440,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      15.9048      15.8756      16.1621      15.8056       0.0960   
+      15.8016      15.8201      15.8872      15.6881       0.0606   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 502677a31..c0af8f2d4 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -122,7 +122,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
      6%|5         | 9.56M/170M [00:00<00:01, 100MB/s]
     14%|#4        | 24.0M/170M [00:00<00:01, 130MB/s]
     24%|##4       | 41.1M/170M [00:00<00:00, 152MB/s]
     35%|###5      | 59.5M/170M [00:00<00:00, 168MB/s]
     44%|####4     | 75.5M/170M [00:00<00:00, 137MB/s]
     54%|#####4    | 92.1M/170M [00:00<00:00, 148MB/s]
     63%|######2   | 107M/170M [00:00<00:00, 132MB/s] 
     74%|#######3  | 125M/170M [00:00<00:00, 148MB/s]
     84%|########3 | 142M/170M [00:01<00:00, 156MB/s]
     94%|#########4| 160M/170M [00:01<00:00, 165MB/s]
    100%|##########| 170M/170M [00:01<00:00, 153MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
      4%|3         | 6.62M/170M [00:00<00:02, 69.4MB/s]
      8%|7         | 13.2M/170M [00:00<00:02, 63.2MB/s]
     11%|#1        | 19.4M/170M [00:00<00:02, 63.4MB/s]
     15%|#4        | 25.4M/170M [00:00<00:02, 56.9MB/s]
     18%|#8        | 31.2M/170M [00:00<00:02, 57.8MB/s]
     22%|##1       | 37.0M/170M [00:00<00:02, 58.5MB/s]
     25%|##5       | 42.6M/170M [00:00<00:02, 50.7MB/s]
     29%|##9       | 49.9M/170M [00:00<00:02, 57.8MB/s]
     34%|###3      | 57.3M/170M [00:01<00:01, 63.4MB/s]
     37%|###7      | 63.6M/170M [00:01<00:01, 56.9MB/s]
     41%|####      | 69.2M/170M [00:01<00:02, 51.8MB/s]
     44%|####3     | 74.4M/170M [00:01<00:02, 49.9MB/s]
     47%|####6     | 79.7M/170M [00:01<00:01, 51.2MB/s]
     51%|#####     | 85.9M/170M [00:01<00:01, 54.9MB/s]
     54%|#####3    | 91.2M/170M [00:01<00:01, 50.9MB/s]
     58%|#####7    | 98.2M/170M [00:01<00:01, 56.8MB/s]
     61%|######1   | 104M/170M [00:01<00:01, 55.4MB/s
 ] 
     64%|######4   | 109M/170M [00:02<00:01, 55.3MB/s]
     68%|######7   | 115M/170M [00:02<00:01, 56.9MB/s]
     71%|#######1  | 121M/170M [00:02<00:00, 56.9MB/s]
     75%|#######4  | 127M/170M [00:02<00:00, 60.6MB/s]
     79%|#######8  | 134M/170M [00:02<00:00, 62.0MB/s]
     82%|########2 | 140M/170M [00:02<00:00, 59.9MB/s]
     86%|########5 | 145M/170M [00:02<00:00, 59.9MB/s]
     89%|########8 | 151M/170M [00:02<00:00, 54.1MB/s]
     92%|#########2| 157M/170M [00:02<00:00, 54.9MB/s]
     95%|#########5| 162M/170M [00:03<00:00, 54.5MB/s]
     98%|#########8| 167M/170M [00:03<00:00, 54.4MB/s]
    100%|##########| 170M/170M [00:03<00:00, 55.6MB/s]
     /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -291,7 +291,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  52.128 seconds)
+   **Total running time of the script:** ( 2 minutes  56.304 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index 840d58fa5..2a631572a 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -219,7 +219,7 @@ training. Other models require a full post training calibration.
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     28%|##7       | 3.74M/13.6M [00:00<00:00, 39.2MB/s]
     87%|########6 | 11.7M/13.6M [00:00<00:00, 65.4MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 65.2MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     19%|#8        | 2.56M/13.6M [00:00<00:00, 26.3MB/s]
     40%|####      | 5.44M/13.6M [00:00<00:00, 28.3MB/s]
     79%|#######8  | 10.7M/13.6M [00:00<00:00, 40.4MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 39.8MB/s]
 
 
 
@@ -399,7 +399,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.5035      90.2568      106.8515     90.0752       1.6751   
+      90.3410      90.2663      92.6286      90.0772       0.2983   
                
 
 
@@ -448,7 +448,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  5.568 seconds)
+   **Total running time of the script:** ( 1 minutes  8.289 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index 4cfb4f257..9076fc4bc 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -426,7 +426,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      119.1493     119.0886     124.4715     118.5037      0.5867   
+      120.0122     120.0230     121.7729     119.2951      0.3593   
                
 
 
@@ -463,7 +463,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  51.489 seconds)
+   **Total running time of the script:** ( 1 minutes  57.142 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 447c358b7..efcbc680c 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -254,7 +254,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  11.867 seconds)
+   **Total running time of the script:** ( 1 minutes  16.285 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index fb8a0c79f..77d83e694 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -157,7 +157,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      1%|1         | 1840/132723 [00:00<00:07, 18226.28KB/s]
      4%|4         | 5878/132723 [00:00<00:04, 31205.56KB/s]
     10%|#         | 13346/132723 [00:00<00:02, 51012.43KB/s]
     16%|#5        | 20925/132723 [00:00<00:01, 60778.55KB/s]
     21%|##        | 27822/132723 [00:00<00:01, 63728.89KB/s]
     27%|##7       | 36063/132723 [00:00<00:01, 70074.61KB/s]
     33%|###3      | 44158/132723 [00:00<00:01, 73623.41KB/s]
     39%|###9      | 52205/132723 [00:00<00:01, 75799.52KB/s]
     45%|####5     | 60100/132723 [00:00<00:00, 76781.43KB/s]
     52%|#####1    | 68527/132723 [00:01<00:00, 79090.25KB/s]
     58%|#####7    | 76877/132723 [00:01<00:00, 80436.97KB/s]
     64%|######4   | 85295/132723 [00:01<00:00, 81572.90KB/s]
     71%|#######   | 93709/132723 [00:01<00:00, 82349.25KB/s]
     77%|#######6  | 102105/132723 [00:01<00:00, 82832.24KB/s]
     83%|########3 | 110421/132723 [00:01<00:00, 82928.19KB/s]
     90%|########9 
 | 118877/132723 [00:01<00:00, 83416.53KB/s]
     96%|#########5| 127254/132723 [00:01<00:00, 83521.58KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 75115.95KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5803/132723 [00:00<00:02, 58023.28KB/s]
     10%|#         | 13589/132723 [00:00<00:01, 69689.66KB/s]
     16%|#6        | 21352/132723 [00:00<00:01, 73313.60KB/s]
     22%|##1       | 29112/132723 [00:00<00:01, 74999.72KB/s]
     28%|##7       | 36850/132723 [00:00<00:01, 75853.72KB/s]
     34%|###3      | 44641/132723 [00:00<00:01, 76548.79KB/s]
     40%|###9      | 52429/132723 [00:00<00:01, 76981.76KB/s]
     45%|####5     | 60297/132723 [00:00<00:00, 77520.08KB/s]
     51%|#####1    | 68174/132723 [00:00<00:00, 77908.73KB/s]
     57%|#####7    | 75965/132723 [00:01<00:00, 77781.03KB/s]
     63%|######3   | 83752/132723 [00:01<00:00, 77805.92KB/s]
     69%|######9   | 91690/132723 [00:01<00:00, 78281.72KB/s]
     75%|#######4  | 99519/132723 [00:01<00:00, 64164.07KB/s]
     81%|########  | 107207/132723 [00:01<00:00, 67489.96KB/s]
     87%|########6 | 114897/132723 [00:01<00:00, 70049.02KB/s]
     92%|#########
 2| 122596/132723 [00:01<00:00, 71990.79KB/s]
     98%|#########8| 130397/132723 [00:01<00:00, 73706.87KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 73640.59KB/s]
 
 
 
@@ -240,7 +240,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  16.842 seconds)
+   **Total running time of the script:** ( 2 minutes  20.021 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 39bba8e08..bd9e8420c 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**10:08.121** total execution time for **how_to_deploy_models** files:
+**10:29.287** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 02:52.128 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 02:56.304 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:16.842 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:20.021 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:51.489 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:57.142 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:11.867 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:16.285 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:05.568 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:08.289 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:28.439 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:29.066 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:21.782 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:22.174 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index aa65536f0..7ce23d21b 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -463,7 +463,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipf52be1a5-c5ea-4cb7-94bf-25a8ca4e0f76 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip58fd880b-e186-4962-8744-f96ce958cceb from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 6e9ad2a1c..6e67fd307 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:38.819** total execution time for **how_to_extend_tvm** files:
+**00:39.972** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:35.676 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:36.854 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.183 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.190 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.953 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.921 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.006 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index 1565bd5f4..8e774086f 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -215,10 +215,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6392us [6392us] (45.10%; 45.10%)
-    FoldScaleAxis: 7780us [6us] (54.90%; 54.90%)
-            FoldConstant: 7775us [1554us] (54.86%; 99.93%)
-                    InferType: 6220us [6220us] (43.89%; 80.01%)
+    InferType: 6812us [6812us] (45.70%; 45.70%)
+    FoldScaleAxis: 8094us [6us] (54.30%; 54.30%)
+            FoldConstant: 8089us [1610us] (54.27%; 99.93%)
+                    InferType: 6479us [6479us] (43.47%; 80.10%)
 
 
 
@@ -257,10 +257,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6185us [6185us] (44.58%; 44.58%)
-    FoldScaleAxis: 7687us [5us] (55.42%; 55.42%)
-            FoldConstant: 7682us [1600us] (55.38%; 99.93%)
-                    InferType: 6082us [6082us] (43.84%; 79.17%)
+    InferType: 6490us [6490us] (44.84%; 44.84%)
+    FoldScaleAxis: 7983us [5us] (55.16%; 55.16%)
+            FoldConstant: 7978us [1637us] (55.12%; 99.94%)
+                    InferType: 6341us [6341us] (43.81%; 79.48%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index e74377643..35ad9f97f 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -327,7 +327,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 34.769972 ms
+    Convolution: 38.487063 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index eb3806f1b..406541a4b 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -658,7 +658,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 8.736410 ms
+    conv2d with tensor core: 7.319375 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index 88bfc3cba..7b3bae4b7 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -130,8 +130,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.018055
-    Baseline: 3.286830
+    Numpy running time: 0.019373
+    Baseline: 3.324971
 
 
 
@@ -226,7 +226,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.297638
+    Opt1: 0.299469
 
 
 
@@ -329,7 +329,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.331090
+    Opt2: 0.336075
 
 
 
@@ -425,7 +425,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.116584
+    Opt3: 0.120163
 
 
 
@@ -550,7 +550,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.110862
+    Opt4: 0.111161
 
 
 
@@ -672,7 +672,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.111049
+    Opt5: 0.111211
 
 
 
@@ -797,7 +797,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.144921
+    Opt6: 0.145250
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 786d3fd57..b3dddddf5 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:33.876** total execution time for **how_to_optimize_operators** files:
+**00:34.299** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:31.664 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:32.047 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.248 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.255 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:00.964 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:00.997 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index b1f1768c6..b3220d5ce 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**05:25.737** total execution time for **how_to_tune_with_autoscheduler** files:
+**05:27.904** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 02:40.053 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 02:39.675 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:19.621 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:21.051 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:42.688 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:42.862 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:26.718 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:27.238 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:08.415 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:08.658 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.242 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.420 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index 645827071..ad9f3ce5d 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -239,1388 +239,726 @@ cooperative fetching, unrolling and operator fusion.
                  compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
       preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 224;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [216]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [1152]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [4], [], scope="local", align=8)[0] = 0f32
-        conv2d_nchw_1[2] = 0f32
-        conv2d_nchw_1[4] = 0f32
-        conv2d_nchw_1[6] = 0f32
-        conv2d_nchw_1[8] = 0f32
-        conv2d_nchw_1[10] = 0f32
-        conv2d_nchw_1[12] = 0f32
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 32;
+      allocate(conv2d_nchw: Pointer(local float32), float32, [28]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [324]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [576]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [28], [], scope="local", align=64)[0] = 0f32
         conv2d_nchw_1[1] = 0f32
+        conv2d_nchw_1[2] = 0f32
         conv2d_nchw_1[3] = 0f32
+        conv2d_nchw_1[4] = 0f32
         conv2d_nchw_1[5] = 0f32
+        conv2d_nchw_1[6] = 0f32
         conv2d_nchw_1[7] = 0f32
+        conv2d_nchw_1[8] = 0f32
         conv2d_nchw_1[9] = 0f32
+        conv2d_nchw_1[10] = 0f32
         conv2d_nchw_1[11] = 0f32
+        conv2d_nchw_1[12] = 0f32
         conv2d_nchw_1[13] = 0f32
-        for (rc.outer.outer: int32, 0, 64) {
-          let cse_var_1: int32 = (rc.outer.outer*392)
+        conv2d_nchw_1[14] = 0f32
+        conv2d_nchw_1[15] = 0f32
+        conv2d_nchw_1[16] = 0f32
+        conv2d_nchw_1[17] = 0f32
+        conv2d_nchw_1[18] = 0f32
+        conv2d_nchw_1[19] = 0f32
+        conv2d_nchw_1[20] = 0f32
+        conv2d_nchw_1[21] = 0f32
+        conv2d_nchw_1[22] = 0f32
+        conv2d_nchw_1[23] = 0f32
+        conv2d_nchw_1[24] = 0f32
+        conv2d_nchw_1[25] = 0f32
+        conv2d_nchw_1[26] = 0f32
+        conv2d_nchw_1[27] = 0f32
+        for (rc.outer.outer: int32, 0, 128) {
+          let cse_var_1: int32 = (rc.outer.outer*196)
            {
-            attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1: Buffer(pad_temp.shared, float32, [216], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else(((1 <= floormod(blockIdx.x, 7)) && (1 <= threadIdx.x_1)), data[(((cse_var_1 + (floormod(blockIdx.x, 7)*7)) + threadIdx.x_1) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 8)] = @tir.if_then_else((((1 <= (floordiv((threadIdx.x_1 + 8), 9) + floormod(blockIdx.x, 7))) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 8), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 16)] = @tir.if_then_else(((((floordiv((threadIdx.x_1 + 16), 9) + floormod(blockIdx.x, 7)) < 8) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 16), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 24)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 24), 27), 9) + floormod(blockIdx.x, 7))) && ((floordiv(floormod((threadIdx.x_1 + 24), 27), 9) + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 24), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 24), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod( [...]
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 32)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1 + 5), 27), 9) + floormod(blockIdx.x, 7))) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 32), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 5), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 40)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 13), 27), 9) + floormod(blockIdx.x, 7)) < 8) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 40), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 13), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 48)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 21), 27), 9) + floormod(blockIdx.x, 7))) && ((floordiv(floormod((threadIdx.x_1 + 21), 27), 9) + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 48), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 21), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod( [...]
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 56)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1 + 2), 27), 9) + floormod(blockIdx.x, 7))) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 56), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 2), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 64)] = @tir.if_then_else((threadIdx.x_1 < 7), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 64), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 10), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + (threadIdx.x_1 + 1)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 72)] = @tir.if_then_else(((floormod(blockIdx.x, 7) < 6) && (1 <= threadIdx.x_1)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 72), 27)*49)) + (floormod(blockIdx.x, 7)*7)) + threadIdx.x_1) + 6)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 80)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 26), 27), 9) + floormod(blockIdx.x, 7))) && ((floordiv(floormod((threadIdx.x_1 + 26), 27), 9) + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 80), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 26), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod( [...]
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 88)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 27), 9) + floormod(blockIdx.x, 7))) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 88), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 7), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 96)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 15), 27), 9) + floormod(blockIdx.x, 7)) < 8) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 96), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 15), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 104)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 23), 27), 9) + floormod(blockIdx.x, 7))) && ((floordiv(floormod((threadIdx.x_1 + 23), 27), 9) + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 104), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 23), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormo [...]
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 112)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1 + 4), 27), 9) + floormod(blockIdx.x, 7))) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 112), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 4), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 120)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 12), 27), 9) + floormod(blockIdx.x, 7)) < 8) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 120), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 12), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 128)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 20), 27), 9) + floormod(blockIdx.x, 7))) && ((floordiv(floormod((threadIdx.x_1 + 20), 27), 9) + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 128), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 20), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormo [...]
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 136)] = @tir.if_then_else(((1 <= floormod(blockIdx.x, 7)) && (threadIdx.x_1 < 7)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 136), 27)*49)) + (floormod(blockIdx.x, 7)*7)) + (threadIdx.x_1 + 1)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 144)] = @tir.if_then_else((1 <= threadIdx.x_1), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 144), 27)*49)) + (floormod(blockIdx.x, 7)*7)) + threadIdx.x_1) - 1)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 152)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 17), 27), 9) + floormod(blockIdx.x, 7)) < 8) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 152), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 17), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 160)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 25), 27), 9) + floormod(blockIdx.x, 7))) && ((floordiv(floormod((threadIdx.x_1 + 25), 27), 9) + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 160), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 25), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormo [...]
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 168)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1 + 6), 27), 9) + floormod(blockIdx.x, 7))) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 168), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 6), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 176)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 14), 27), 9) + floormod(blockIdx.x, 7)) < 8) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 176), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 14), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 184)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 22), 27), 9) + floormod(blockIdx.x, 7))) && ((floordiv(floormod((threadIdx.x_1 + 22), 27), 9) + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 184), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 22), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormo [...]
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 192)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1 + 3), 27), 9) + floormod(blockIdx.x, 7))) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 192), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 3), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 200)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 11), 27), 9) + floormod(blockIdx.x, 7)) < 8) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 200), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 11), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            pad_temp.shared_1[(threadIdx.x_1 + 208)] = @tir.if_then_else((((floordiv(floormod((threadIdx.x_1 + 19), 27), 9) + floormod(blockIdx.x, 7)) < 8) && (threadIdx.x_1 < 7)), data[(((((cse_var_1 + (floordiv((threadIdx.x_1 + 208), 27)*49)) + (floordiv(floormod((threadIdx.x_1 + 19), 27), 9)*7)) + (floormod(blockIdx.x, 7)*7)) + (threadIdx.x_1 + 1)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1: Buffer(kernel.shared, float32, [1152], [], scope="shared")[threadIdx.x_2] = kernel[(((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 8)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 16)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 24)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 24)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 32)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 40)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 48)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 48)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 72)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 4608)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 80)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 4608)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 88)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 4608)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 96)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 4632)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 104)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 4608)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 4608)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 120)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 4656)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 4608)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 136)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 4608)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 144)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 9216)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 152)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 9216)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 160)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 9216)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 9240)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 176)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 9216)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 184)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 9216)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 9264)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 200)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 9216)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 208)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 9216)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 216)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 13824)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 13824)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 232)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 13824)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 240)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 13848)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 248)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 13824)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 13824)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 264)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 13872)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 272)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 13824)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 13824)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 288)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 18432)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 296)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 18432)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 304)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 18432)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 312)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 18456)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 18432)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 328)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 18432)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 18480)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 344)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 18432)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 352)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 18432)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 360)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 23040)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 368)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 23040)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 376)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 23040)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 23064)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 23040)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 400)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 23040)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 408)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 23088)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 416)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 23040)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 424)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 23040)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 432)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 27648)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 440)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 27648)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 27648)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 456)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 27672)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 464)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 27648)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 472)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 27648)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 480)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 27696)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 488)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 27648)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 496)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 27648)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 504)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 32256)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 32256)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 520)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 32256)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 528)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 32280)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 536)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 32256)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 544)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 32256)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 552)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 32304)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 560)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 32256)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 568)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 32256)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 36864)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 584)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 36864)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 592)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 36864)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 600)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 36888)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 608)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 36864)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 616)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 36864)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 624)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 36912)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 632)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 36864)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 36864)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 648)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 41472)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 656)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 41472)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 664)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 41472)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 672)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 41496)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 680)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 41472)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 688)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 41472)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 696)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 41520)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 41472)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 712)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 41472)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 720)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 46080)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 728)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 46080)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 736)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 46080)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 744)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 46104)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 752)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 46080)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 760)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 46080)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 46128)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 776)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 46080)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 46080)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 792)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 50688)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 800)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 50688)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 808)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 50688)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 816)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 50712)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 824)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 50688)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 50688)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 840)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 50736)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 848)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 50688)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 856)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 50688)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 864)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 55296)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 872)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 55296)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 880)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 55296)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 888)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 55320)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 55296)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 904)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 55296)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 912)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 55344)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 920)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 55296)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 928)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 55296)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 936)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 59904)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 944)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 59904)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 952)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 59904)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 59928)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 968)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 59904)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 976)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 59904)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 984)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 59952)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 992)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 59904)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1000)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 59904)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1008)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1016)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1032)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 64536)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1040)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1048)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1056)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 64560)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1064)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1072)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1080)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 69120)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 8), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 69120)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1096)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 16), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 69120)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1104)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 69144)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1112)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 32), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 69120)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1120)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 40), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 69120)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1128)] = kernel[((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + threadIdx.x_2) + 69168)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1136)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 56), 3)*3)) + floormod((threadIdx.x_2 + 2), 3)) + 69120)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 8;
-            kernel.shared_1[(threadIdx.x_2 + 1144)] = kernel[(((((floordiv(blockIdx.x, 7)*73728) + (rc.outer.outer*72)) + (floordiv((threadIdx.x_2 + 64), 3)*3)) + floormod((threadIdx.x_2 + 1), 3)) + 69120)]
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*144)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*144)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*144)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*144)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*144)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*144)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*144)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*144) + 9)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*144) + 9)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*144) + 9)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*144) + 9)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*144) + 9)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*144) + 9)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*144) + 9)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*144) + 18)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*144) + 18)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*144) + 18)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*144) + 18)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*144) + 18)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*144) + 18)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*144) + 18)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[81]*kernel.shared_1[((threadIdx.x*144) + 27)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[82]*kernel.shared_1[((threadIdx.x*144) + 27)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[83]*kernel.shared_1[((threadIdx.x*144) + 27)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[84]*kernel.shared_1[((threadIdx.x*144) + 27)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[85]*kernel.shared_1[((threadIdx.x*144) + 27)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[86]*kernel.shared_1[((threadIdx.x*144) + 27)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[87]*kernel.shared_1[((threadIdx.x*144) + 27)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*144) + 72)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*144) + 72)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*144) + 72)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*144) + 72)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*144) + 72)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*144) + 72)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*144) + 72)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*144) + 81)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*144) + 81)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*144) + 81)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*144) + 81)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*144) + 81)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*144) + 81)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*144) + 81)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*144) + 90)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*144) + 90)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*144) + 90)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*144) + 90)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*144) + 90)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*144) + 90)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*144) + 90)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[81]*kernel.shared_1[((threadIdx.x*144) + 99)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[82]*kernel.shared_1[((threadIdx.x*144) + 99)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[83]*kernel.shared_1[((threadIdx.x*144) + 99)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[84]*kernel.shared_1[((threadIdx.x*144) + 99)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[85]*kernel.shared_1[((threadIdx.x*144) + 99)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[86]*kernel.shared_1[((threadIdx.x*144) + 99)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[87]*kernel.shared_1[((threadIdx.x*144) + 99)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*144) + 1)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*144) + 1)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*144) + 1)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*144) + 1)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*144) + 1)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*144) + 1)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*144) + 1)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*144) + 10)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*144) + 10)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*144) + 10)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*144) + 10)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*144) + 10)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*144) + 10)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*144) + 10)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*144) + 19)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*144) + 19)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*144) + 19)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*144) + 19)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*144) + 19)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*144) + 19)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*144) + 19)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[82]*kernel.shared_1[((threadIdx.x*144) + 28)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[83]*kernel.shared_1[((threadIdx.x*144) + 28)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[84]*kernel.shared_1[((threadIdx.x*144) + 28)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[85]*kernel.shared_1[((threadIdx.x*144) + 28)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[86]*kernel.shared_1[((threadIdx.x*144) + 28)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[87]*kernel.shared_1[((threadIdx.x*144) + 28)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[88]*kernel.shared_1[((threadIdx.x*144) + 28)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*144) + 73)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*144) + 73)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*144) + 73)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*144) + 73)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*144) + 73)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*144) + 73)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*144) + 73)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*144) + 82)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*144) + 82)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*144) + 82)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*144) + 82)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*144) + 82)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*144) + 82)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*144) + 82)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*144) + 91)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*144) + 91)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*144) + 91)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*144) + 91)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*144) + 91)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*144) + 91)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*144) + 91)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[82]*kernel.shared_1[((threadIdx.x*144) + 100)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[83]*kernel.shared_1[((threadIdx.x*144) + 100)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[84]*kernel.shared_1[((threadIdx.x*144) + 100)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[85]*kernel.shared_1[((threadIdx.x*144) + 100)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[86]*kernel.shared_1[((threadIdx.x*144) + 100)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[87]*kernel.shared_1[((threadIdx.x*144) + 100)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[88]*kernel.shared_1[((threadIdx.x*144) + 100)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*144) + 2)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*144) + 2)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*144) + 2)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*144) + 2)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*144) + 2)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*144) + 2)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*144) + 2)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*144) + 11)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*144) + 11)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*144) + 11)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*144) + 11)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*144) + 11)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*144) + 11)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*144) + 11)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*144) + 20)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*144) + 20)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*144) + 20)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*144) + 20)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*144) + 20)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*144) + 20)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*144) + 20)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[83]*kernel.shared_1[((threadIdx.x*144) + 29)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[84]*kernel.shared_1[((threadIdx.x*144) + 29)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[85]*kernel.shared_1[((threadIdx.x*144) + 29)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[86]*kernel.shared_1[((threadIdx.x*144) + 29)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[87]*kernel.shared_1[((threadIdx.x*144) + 29)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[88]*kernel.shared_1[((threadIdx.x*144) + 29)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[89]*kernel.shared_1[((threadIdx.x*144) + 29)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*144) + 74)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*144) + 74)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*144) + 74)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*144) + 74)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*144) + 74)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*144) + 74)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*144) + 74)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*144) + 83)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*144) + 83)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*144) + 83)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*144) + 83)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*144) + 83)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*144) + 83)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*144) + 83)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*144) + 92)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*144) + 92)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*144) + 92)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*144) + 92)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*144) + 92)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*144) + 92)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*144) + 92)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[83]*kernel.shared_1[((threadIdx.x*144) + 101)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[84]*kernel.shared_1[((threadIdx.x*144) + 101)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[85]*kernel.shared_1[((threadIdx.x*144) + 101)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[86]*kernel.shared_1[((threadIdx.x*144) + 101)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[87]*kernel.shared_1[((threadIdx.x*144) + 101)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[88]*kernel.shared_1[((threadIdx.x*144) + 101)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[89]*kernel.shared_1[((threadIdx.x*144) + 101)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*144) + 3)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*144) + 3)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*144) + 3)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*144) + 3)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*144) + 3)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*144) + 3)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*144) + 3)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*144) + 12)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*144) + 12)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*144) + 12)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*144) + 12)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*144) + 12)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*144) + 12)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*144) + 12)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*144) + 21)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*144) + 21)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*144) + 21)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*144) + 21)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*144) + 21)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*144) + 21)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*144) + 21)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[90]*kernel.shared_1[((threadIdx.x*144) + 30)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[91]*kernel.shared_1[((threadIdx.x*144) + 30)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[92]*kernel.shared_1[((threadIdx.x*144) + 30)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[93]*kernel.shared_1[((threadIdx.x*144) + 30)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[94]*kernel.shared_1[((threadIdx.x*144) + 30)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[95]*kernel.shared_1[((threadIdx.x*144) + 30)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[96]*kernel.shared_1[((threadIdx.x*144) + 30)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*144) + 75)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*144) + 75)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*144) + 75)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*144) + 75)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*144) + 75)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*144) + 75)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*144) + 75)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*144) + 84)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*144) + 84)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*144) + 84)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*144) + 84)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*144) + 84)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*144) + 84)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*144) + 84)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*144) + 93)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*144) + 93)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*144) + 93)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*144) + 93)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*144) + 93)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*144) + 93)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*144) + 93)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[90]*kernel.shared_1[((threadIdx.x*144) + 102)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[91]*kernel.shared_1[((threadIdx.x*144) + 102)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[92]*kernel.shared_1[((threadIdx.x*144) + 102)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[93]*kernel.shared_1[((threadIdx.x*144) + 102)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[94]*kernel.shared_1[((threadIdx.x*144) + 102)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[95]*kernel.shared_1[((threadIdx.x*144) + 102)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[96]*kernel.shared_1[((threadIdx.x*144) + 102)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*144) + 4)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*144) + 4)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*144) + 4)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*144) + 4)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*144) + 4)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*144) + 4)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*144) + 4)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*144) + 13)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*144) + 13)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*144) + 13)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*144) + 13)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*144) + 13)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*144) + 13)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*144) + 13)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*144) + 22)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*144) + 22)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*144) + 22)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*144) + 22)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*144) + 22)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*144) + 22)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*144) + 22)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[91]*kernel.shared_1[((threadIdx.x*144) + 31)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[92]*kernel.shared_1[((threadIdx.x*144) + 31)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[93]*kernel.shared_1[((threadIdx.x*144) + 31)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[94]*kernel.shared_1[((threadIdx.x*144) + 31)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[95]*kernel.shared_1[((threadIdx.x*144) + 31)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[96]*kernel.shared_1[((threadIdx.x*144) + 31)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[97]*kernel.shared_1[((threadIdx.x*144) + 31)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*144) + 76)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*144) + 76)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*144) + 76)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*144) + 76)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*144) + 76)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*144) + 76)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*144) + 76)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*144) + 85)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*144) + 85)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*144) + 85)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*144) + 85)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*144) + 85)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*144) + 85)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*144) + 85)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*144) + 94)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*144) + 94)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*144) + 94)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*144) + 94)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*144) + 94)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*144) + 94)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*144) + 94)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[91]*kernel.shared_1[((threadIdx.x*144) + 103)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[92]*kernel.shared_1[((threadIdx.x*144) + 103)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[93]*kernel.shared_1[((threadIdx.x*144) + 103)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[94]*kernel.shared_1[((threadIdx.x*144) + 103)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[95]*kernel.shared_1[((threadIdx.x*144) + 103)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[96]*kernel.shared_1[((threadIdx.x*144) + 103)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[97]*kernel.shared_1[((threadIdx.x*144) + 103)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*144) + 5)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*144) + 5)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*144) + 5)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*144) + 5)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*144) + 5)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*144) + 5)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*144) + 5)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*144) + 14)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*144) + 14)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*144) + 14)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*144) + 14)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*144) + 14)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*144) + 14)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*144) + 14)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*144) + 23)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*144) + 23)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*144) + 23)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*144) + 23)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*144) + 23)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*144) + 23)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*144) + 23)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[92]*kernel.shared_1[((threadIdx.x*144) + 32)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[93]*kernel.shared_1[((threadIdx.x*144) + 32)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[94]*kernel.shared_1[((threadIdx.x*144) + 32)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[95]*kernel.shared_1[((threadIdx.x*144) + 32)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[96]*kernel.shared_1[((threadIdx.x*144) + 32)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[97]*kernel.shared_1[((threadIdx.x*144) + 32)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[98]*kernel.shared_1[((threadIdx.x*144) + 32)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*144) + 77)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*144) + 77)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*144) + 77)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*144) + 77)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*144) + 77)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*144) + 77)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*144) + 77)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*144) + 86)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*144) + 86)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*144) + 86)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*144) + 86)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*144) + 86)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*144) + 86)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*144) + 86)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*144) + 95)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*144) + 95)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*144) + 95)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*144) + 95)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*144) + 95)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*144) + 95)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*144) + 95)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[92]*kernel.shared_1[((threadIdx.x*144) + 104)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[93]*kernel.shared_1[((threadIdx.x*144) + 104)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[94]*kernel.shared_1[((threadIdx.x*144) + 104)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[95]*kernel.shared_1[((threadIdx.x*144) + 104)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[96]*kernel.shared_1[((threadIdx.x*144) + 104)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[97]*kernel.shared_1[((threadIdx.x*144) + 104)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[98]*kernel.shared_1[((threadIdx.x*144) + 104)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*144) + 6)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*144) + 6)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*144) + 6)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*144) + 6)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*144) + 6)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*144) + 6)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*144) + 6)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*144) + 15)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*144) + 15)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*144) + 15)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*144) + 15)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*144) + 15)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*144) + 15)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*144) + 15)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[72]*kernel.shared_1[((threadIdx.x*144) + 24)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[73]*kernel.shared_1[((threadIdx.x*144) + 24)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[74]*kernel.shared_1[((threadIdx.x*144) + 24)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[75]*kernel.shared_1[((threadIdx.x*144) + 24)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[76]*kernel.shared_1[((threadIdx.x*144) + 24)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[77]*kernel.shared_1[((threadIdx.x*144) + 24)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[78]*kernel.shared_1[((threadIdx.x*144) + 24)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[99]*kernel.shared_1[((threadIdx.x*144) + 33)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[100]*kernel.shared_1[((threadIdx.x*144) + 33)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[101]*kernel.shared_1[((threadIdx.x*144) + 33)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[102]*kernel.shared_1[((threadIdx.x*144) + 33)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[103]*kernel.shared_1[((threadIdx.x*144) + 33)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[104]*kernel.shared_1[((threadIdx.x*144) + 33)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[105]*kernel.shared_1[((threadIdx.x*144) + 33)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*144) + 78)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*144) + 78)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*144) + 78)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*144) + 78)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*144) + 78)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*144) + 78)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*144) + 78)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*144) + 87)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*144) + 87)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*144) + 87)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*144) + 87)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*144) + 87)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*144) + 87)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*144) + 87)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[72]*kernel.shared_1[((threadIdx.x*144) + 96)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[73]*kernel.shared_1[((threadIdx.x*144) + 96)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[74]*kernel.shared_1[((threadIdx.x*144) + 96)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[75]*kernel.shared_1[((threadIdx.x*144) + 96)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[76]*kernel.shared_1[((threadIdx.x*144) + 96)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[77]*kernel.shared_1[((threadIdx.x*144) + 96)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[78]*kernel.shared_1[((threadIdx.x*144) + 96)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[99]*kernel.shared_1[((threadIdx.x*144) + 105)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[100]*kernel.shared_1[((threadIdx.x*144) + 105)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[101]*kernel.shared_1[((threadIdx.x*144) + 105)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[102]*kernel.shared_1[((threadIdx.x*144) + 105)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[103]*kernel.shared_1[((threadIdx.x*144) + 105)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[104]*kernel.shared_1[((threadIdx.x*144) + 105)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[105]*kernel.shared_1[((threadIdx.x*144) + 105)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*144) + 7)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*144) + 7)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*144) + 7)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*144) + 7)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*144) + 7)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*144) + 7)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*144) + 7)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*144) + 16)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*144) + 16)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*144) + 16)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*144) + 16)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*144) + 16)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*144) + 16)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*144) + 16)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[73]*kernel.shared_1[((threadIdx.x*144) + 25)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[74]*kernel.shared_1[((threadIdx.x*144) + 25)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[75]*kernel.shared_1[((threadIdx.x*144) + 25)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[76]*kernel.shared_1[((threadIdx.x*144) + 25)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[77]*kernel.shared_1[((threadIdx.x*144) + 25)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[78]*kernel.shared_1[((threadIdx.x*144) + 25)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[79]*kernel.shared_1[((threadIdx.x*144) + 25)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[100]*kernel.shared_1[((threadIdx.x*144) + 34)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[101]*kernel.shared_1[((threadIdx.x*144) + 34)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[102]*kernel.shared_1[((threadIdx.x*144) + 34)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[103]*kernel.shared_1[((threadIdx.x*144) + 34)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[104]*kernel.shared_1[((threadIdx.x*144) + 34)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[105]*kernel.shared_1[((threadIdx.x*144) + 34)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[106]*kernel.shared_1[((threadIdx.x*144) + 34)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*144) + 79)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*144) + 79)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*144) + 79)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*144) + 79)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*144) + 79)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*144) + 79)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*144) + 79)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*144) + 88)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*144) + 88)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*144) + 88)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*144) + 88)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*144) + 88)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*144) + 88)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*144) + 88)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[73]*kernel.shared_1[((threadIdx.x*144) + 97)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[74]*kernel.shared_1[((threadIdx.x*144) + 97)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[75]*kernel.shared_1[((threadIdx.x*144) + 97)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[76]*kernel.shared_1[((threadIdx.x*144) + 97)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[77]*kernel.shared_1[((threadIdx.x*144) + 97)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[78]*kernel.shared_1[((threadIdx.x*144) + 97)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[79]*kernel.shared_1[((threadIdx.x*144) + 97)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[100]*kernel.shared_1[((threadIdx.x*144) + 106)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[101]*kernel.shared_1[((threadIdx.x*144) + 106)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[102]*kernel.shared_1[((threadIdx.x*144) + 106)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[103]*kernel.shared_1[((threadIdx.x*144) + 106)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[104]*kernel.shared_1[((threadIdx.x*144) + 106)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[105]*kernel.shared_1[((threadIdx.x*144) + 106)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[106]*kernel.shared_1[((threadIdx.x*144) + 106)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*144) + 8)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*144) + 8)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*144) + 8)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*144) + 8)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*144) + 8)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*144) + 8)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*144) + 8)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*144) + 17)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*144) + 17)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*144) + 17)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*144) + 17)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*144) + 17)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*144) + 17)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*144) + 17)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[74]*kernel.shared_1[((threadIdx.x*144) + 26)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[75]*kernel.shared_1[((threadIdx.x*144) + 26)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[76]*kernel.shared_1[((threadIdx.x*144) + 26)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[77]*kernel.shared_1[((threadIdx.x*144) + 26)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[78]*kernel.shared_1[((threadIdx.x*144) + 26)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[79]*kernel.shared_1[((threadIdx.x*144) + 26)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[80]*kernel.shared_1[((threadIdx.x*144) + 26)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[101]*kernel.shared_1[((threadIdx.x*144) + 35)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[102]*kernel.shared_1[((threadIdx.x*144) + 35)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[103]*kernel.shared_1[((threadIdx.x*144) + 35)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[104]*kernel.shared_1[((threadIdx.x*144) + 35)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[105]*kernel.shared_1[((threadIdx.x*144) + 35)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[106]*kernel.shared_1[((threadIdx.x*144) + 35)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[107]*kernel.shared_1[((threadIdx.x*144) + 35)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*144) + 80)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*144) + 80)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*144) + 80)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*144) + 80)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*144) + 80)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*144) + 80)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*144) + 80)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*144) + 89)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*144) + 89)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*144) + 89)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*144) + 89)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*144) + 89)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*144) + 89)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*144) + 89)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[74]*kernel.shared_1[((threadIdx.x*144) + 98)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[75]*kernel.shared_1[((threadIdx.x*144) + 98)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[76]*kernel.shared_1[((threadIdx.x*144) + 98)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[77]*kernel.shared_1[((threadIdx.x*144) + 98)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[78]*kernel.shared_1[((threadIdx.x*144) + 98)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[79]*kernel.shared_1[((threadIdx.x*144) + 98)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[80]*kernel.shared_1[((threadIdx.x*144) + 98)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[101]*kernel.shared_1[((threadIdx.x*144) + 107)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[102]*kernel.shared_1[((threadIdx.x*144) + 107)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[103]*kernel.shared_1[((threadIdx.x*144) + 107)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[104]*kernel.shared_1[((threadIdx.x*144) + 107)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[105]*kernel.shared_1[((threadIdx.x*144) + 107)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[106]*kernel.shared_1[((threadIdx.x*144) + 107)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[107]*kernel.shared_1[((threadIdx.x*144) + 107)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[108]*kernel.shared_1[((threadIdx.x*144) + 36)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[109]*kernel.shared_1[((threadIdx.x*144) + 36)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[110]*kernel.shared_1[((threadIdx.x*144) + 36)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[111]*kernel.shared_1[((threadIdx.x*144) + 36)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[112]*kernel.shared_1[((threadIdx.x*144) + 36)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[113]*kernel.shared_1[((threadIdx.x*144) + 36)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[114]*kernel.shared_1[((threadIdx.x*144) + 36)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[135]*kernel.shared_1[((threadIdx.x*144) + 45)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[136]*kernel.shared_1[((threadIdx.x*144) + 45)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[137]*kernel.shared_1[((threadIdx.x*144) + 45)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[138]*kernel.shared_1[((threadIdx.x*144) + 45)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[139]*kernel.shared_1[((threadIdx.x*144) + 45)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[140]*kernel.shared_1[((threadIdx.x*144) + 45)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[141]*kernel.shared_1[((threadIdx.x*144) + 45)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[162]*kernel.shared_1[((threadIdx.x*144) + 54)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[163]*kernel.shared_1[((threadIdx.x*144) + 54)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[164]*kernel.shared_1[((threadIdx.x*144) + 54)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[165]*kernel.shared_1[((threadIdx.x*144) + 54)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[166]*kernel.shared_1[((threadIdx.x*144) + 54)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[167]*kernel.shared_1[((threadIdx.x*144) + 54)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[168]*kernel.shared_1[((threadIdx.x*144) + 54)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[189]*kernel.shared_1[((threadIdx.x*144) + 63)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[190]*kernel.shared_1[((threadIdx.x*144) + 63)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[191]*kernel.shared_1[((threadIdx.x*144) + 63)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[192]*kernel.shared_1[((threadIdx.x*144) + 63)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[193]*kernel.shared_1[((threadIdx.x*144) + 63)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[194]*kernel.shared_1[((threadIdx.x*144) + 63)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[195]*kernel.shared_1[((threadIdx.x*144) + 63)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[108]*kernel.shared_1[((threadIdx.x*144) + 108)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[109]*kernel.shared_1[((threadIdx.x*144) + 108)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[110]*kernel.shared_1[((threadIdx.x*144) + 108)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[111]*kernel.shared_1[((threadIdx.x*144) + 108)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[112]*kernel.shared_1[((threadIdx.x*144) + 108)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[113]*kernel.shared_1[((threadIdx.x*144) + 108)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[114]*kernel.shared_1[((threadIdx.x*144) + 108)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[135]*kernel.shared_1[((threadIdx.x*144) + 117)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[136]*kernel.shared_1[((threadIdx.x*144) + 117)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[137]*kernel.shared_1[((threadIdx.x*144) + 117)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[138]*kernel.shared_1[((threadIdx.x*144) + 117)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[139]*kernel.shared_1[((threadIdx.x*144) + 117)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[140]*kernel.shared_1[((threadIdx.x*144) + 117)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[141]*kernel.shared_1[((threadIdx.x*144) + 117)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[162]*kernel.shared_1[((threadIdx.x*144) + 126)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[163]*kernel.shared_1[((threadIdx.x*144) + 126)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[164]*kernel.shared_1[((threadIdx.x*144) + 126)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[165]*kernel.shared_1[((threadIdx.x*144) + 126)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[166]*kernel.shared_1[((threadIdx.x*144) + 126)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[167]*kernel.shared_1[((threadIdx.x*144) + 126)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[168]*kernel.shared_1[((threadIdx.x*144) + 126)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[189]*kernel.shared_1[((threadIdx.x*144) + 135)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[190]*kernel.shared_1[((threadIdx.x*144) + 135)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[191]*kernel.shared_1[((threadIdx.x*144) + 135)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[192]*kernel.shared_1[((threadIdx.x*144) + 135)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[193]*kernel.shared_1[((threadIdx.x*144) + 135)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[194]*kernel.shared_1[((threadIdx.x*144) + 135)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[195]*kernel.shared_1[((threadIdx.x*144) + 135)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[109]*kernel.shared_1[((threadIdx.x*144) + 37)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[110]*kernel.shared_1[((threadIdx.x*144) + 37)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[111]*kernel.shared_1[((threadIdx.x*144) + 37)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[112]*kernel.shared_1[((threadIdx.x*144) + 37)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[113]*kernel.shared_1[((threadIdx.x*144) + 37)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[114]*kernel.shared_1[((threadIdx.x*144) + 37)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[115]*kernel.shared_1[((threadIdx.x*144) + 37)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[136]*kernel.shared_1[((threadIdx.x*144) + 46)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[137]*kernel.shared_1[((threadIdx.x*144) + 46)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[138]*kernel.shared_1[((threadIdx.x*144) + 46)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[139]*kernel.shared_1[((threadIdx.x*144) + 46)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[140]*kernel.shared_1[((threadIdx.x*144) + 46)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[141]*kernel.shared_1[((threadIdx.x*144) + 46)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[142]*kernel.shared_1[((threadIdx.x*144) + 46)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[163]*kernel.shared_1[((threadIdx.x*144) + 55)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[164]*kernel.shared_1[((threadIdx.x*144) + 55)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[165]*kernel.shared_1[((threadIdx.x*144) + 55)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[166]*kernel.shared_1[((threadIdx.x*144) + 55)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[167]*kernel.shared_1[((threadIdx.x*144) + 55)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[168]*kernel.shared_1[((threadIdx.x*144) + 55)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[169]*kernel.shared_1[((threadIdx.x*144) + 55)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[190]*kernel.shared_1[((threadIdx.x*144) + 64)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[191]*kernel.shared_1[((threadIdx.x*144) + 64)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[192]*kernel.shared_1[((threadIdx.x*144) + 64)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[193]*kernel.shared_1[((threadIdx.x*144) + 64)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[194]*kernel.shared_1[((threadIdx.x*144) + 64)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[195]*kernel.shared_1[((threadIdx.x*144) + 64)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[196]*kernel.shared_1[((threadIdx.x*144) + 64)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[109]*kernel.shared_1[((threadIdx.x*144) + 109)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[110]*kernel.shared_1[((threadIdx.x*144) + 109)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[111]*kernel.shared_1[((threadIdx.x*144) + 109)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[112]*kernel.shared_1[((threadIdx.x*144) + 109)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[113]*kernel.shared_1[((threadIdx.x*144) + 109)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[114]*kernel.shared_1[((threadIdx.x*144) + 109)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[115]*kernel.shared_1[((threadIdx.x*144) + 109)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[136]*kernel.shared_1[((threadIdx.x*144) + 118)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[137]*kernel.shared_1[((threadIdx.x*144) + 118)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[138]*kernel.shared_1[((threadIdx.x*144) + 118)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[139]*kernel.shared_1[((threadIdx.x*144) + 118)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[140]*kernel.shared_1[((threadIdx.x*144) + 118)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[141]*kernel.shared_1[((threadIdx.x*144) + 118)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[142]*kernel.shared_1[((threadIdx.x*144) + 118)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[163]*kernel.shared_1[((threadIdx.x*144) + 127)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[164]*kernel.shared_1[((threadIdx.x*144) + 127)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[165]*kernel.shared_1[((threadIdx.x*144) + 127)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[166]*kernel.shared_1[((threadIdx.x*144) + 127)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[167]*kernel.shared_1[((threadIdx.x*144) + 127)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[168]*kernel.shared_1[((threadIdx.x*144) + 127)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[169]*kernel.shared_1[((threadIdx.x*144) + 127)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[190]*kernel.shared_1[((threadIdx.x*144) + 136)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[191]*kernel.shared_1[((threadIdx.x*144) + 136)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[192]*kernel.shared_1[((threadIdx.x*144) + 136)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[193]*kernel.shared_1[((threadIdx.x*144) + 136)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[194]*kernel.shared_1[((threadIdx.x*144) + 136)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[195]*kernel.shared_1[((threadIdx.x*144) + 136)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[196]*kernel.shared_1[((threadIdx.x*144) + 136)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[110]*kernel.shared_1[((threadIdx.x*144) + 38)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[111]*kernel.shared_1[((threadIdx.x*144) + 38)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[112]*kernel.shared_1[((threadIdx.x*144) + 38)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[113]*kernel.shared_1[((threadIdx.x*144) + 38)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[114]*kernel.shared_1[((threadIdx.x*144) + 38)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[115]*kernel.shared_1[((threadIdx.x*144) + 38)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[116]*kernel.shared_1[((threadIdx.x*144) + 38)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[137]*kernel.shared_1[((threadIdx.x*144) + 47)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[138]*kernel.shared_1[((threadIdx.x*144) + 47)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[139]*kernel.shared_1[((threadIdx.x*144) + 47)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[140]*kernel.shared_1[((threadIdx.x*144) + 47)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[141]*kernel.shared_1[((threadIdx.x*144) + 47)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[142]*kernel.shared_1[((threadIdx.x*144) + 47)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[143]*kernel.shared_1[((threadIdx.x*144) + 47)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[164]*kernel.shared_1[((threadIdx.x*144) + 56)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[165]*kernel.shared_1[((threadIdx.x*144) + 56)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[166]*kernel.shared_1[((threadIdx.x*144) + 56)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[167]*kernel.shared_1[((threadIdx.x*144) + 56)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[168]*kernel.shared_1[((threadIdx.x*144) + 56)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[169]*kernel.shared_1[((threadIdx.x*144) + 56)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[170]*kernel.shared_1[((threadIdx.x*144) + 56)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[191]*kernel.shared_1[((threadIdx.x*144) + 65)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[192]*kernel.shared_1[((threadIdx.x*144) + 65)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[193]*kernel.shared_1[((threadIdx.x*144) + 65)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[194]*kernel.shared_1[((threadIdx.x*144) + 65)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[195]*kernel.shared_1[((threadIdx.x*144) + 65)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[196]*kernel.shared_1[((threadIdx.x*144) + 65)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[197]*kernel.shared_1[((threadIdx.x*144) + 65)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[110]*kernel.shared_1[((threadIdx.x*144) + 110)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[111]*kernel.shared_1[((threadIdx.x*144) + 110)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[112]*kernel.shared_1[((threadIdx.x*144) + 110)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[113]*kernel.shared_1[((threadIdx.x*144) + 110)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[114]*kernel.shared_1[((threadIdx.x*144) + 110)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[115]*kernel.shared_1[((threadIdx.x*144) + 110)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[116]*kernel.shared_1[((threadIdx.x*144) + 110)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[137]*kernel.shared_1[((threadIdx.x*144) + 119)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[138]*kernel.shared_1[((threadIdx.x*144) + 119)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[139]*kernel.shared_1[((threadIdx.x*144) + 119)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[140]*kernel.shared_1[((threadIdx.x*144) + 119)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[141]*kernel.shared_1[((threadIdx.x*144) + 119)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[142]*kernel.shared_1[((threadIdx.x*144) + 119)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[143]*kernel.shared_1[((threadIdx.x*144) + 119)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[164]*kernel.shared_1[((threadIdx.x*144) + 128)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[165]*kernel.shared_1[((threadIdx.x*144) + 128)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[166]*kernel.shared_1[((threadIdx.x*144) + 128)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[167]*kernel.shared_1[((threadIdx.x*144) + 128)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[168]*kernel.shared_1[((threadIdx.x*144) + 128)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[169]*kernel.shared_1[((threadIdx.x*144) + 128)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[170]*kernel.shared_1[((threadIdx.x*144) + 128)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[191]*kernel.shared_1[((threadIdx.x*144) + 137)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[192]*kernel.shared_1[((threadIdx.x*144) + 137)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[193]*kernel.shared_1[((threadIdx.x*144) + 137)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[194]*kernel.shared_1[((threadIdx.x*144) + 137)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[195]*kernel.shared_1[((threadIdx.x*144) + 137)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[196]*kernel.shared_1[((threadIdx.x*144) + 137)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[197]*kernel.shared_1[((threadIdx.x*144) + 137)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[117]*kernel.shared_1[((threadIdx.x*144) + 39)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[118]*kernel.shared_1[((threadIdx.x*144) + 39)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[119]*kernel.shared_1[((threadIdx.x*144) + 39)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[120]*kernel.shared_1[((threadIdx.x*144) + 39)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[121]*kernel.shared_1[((threadIdx.x*144) + 39)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[122]*kernel.shared_1[((threadIdx.x*144) + 39)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[123]*kernel.shared_1[((threadIdx.x*144) + 39)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[144]*kernel.shared_1[((threadIdx.x*144) + 48)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[145]*kernel.shared_1[((threadIdx.x*144) + 48)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[146]*kernel.shared_1[((threadIdx.x*144) + 48)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[147]*kernel.shared_1[((threadIdx.x*144) + 48)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[148]*kernel.shared_1[((threadIdx.x*144) + 48)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[149]*kernel.shared_1[((threadIdx.x*144) + 48)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[150]*kernel.shared_1[((threadIdx.x*144) + 48)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[171]*kernel.shared_1[((threadIdx.x*144) + 57)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[172]*kernel.shared_1[((threadIdx.x*144) + 57)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[173]*kernel.shared_1[((threadIdx.x*144) + 57)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[174]*kernel.shared_1[((threadIdx.x*144) + 57)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[175]*kernel.shared_1[((threadIdx.x*144) + 57)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[176]*kernel.shared_1[((threadIdx.x*144) + 57)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[177]*kernel.shared_1[((threadIdx.x*144) + 57)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[198]*kernel.shared_1[((threadIdx.x*144) + 66)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[199]*kernel.shared_1[((threadIdx.x*144) + 66)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[200]*kernel.shared_1[((threadIdx.x*144) + 66)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[201]*kernel.shared_1[((threadIdx.x*144) + 66)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[202]*kernel.shared_1[((threadIdx.x*144) + 66)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[203]*kernel.shared_1[((threadIdx.x*144) + 66)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[204]*kernel.shared_1[((threadIdx.x*144) + 66)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[117]*kernel.shared_1[((threadIdx.x*144) + 111)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[118]*kernel.shared_1[((threadIdx.x*144) + 111)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[119]*kernel.shared_1[((threadIdx.x*144) + 111)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[120]*kernel.shared_1[((threadIdx.x*144) + 111)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[121]*kernel.shared_1[((threadIdx.x*144) + 111)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[122]*kernel.shared_1[((threadIdx.x*144) + 111)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[123]*kernel.shared_1[((threadIdx.x*144) + 111)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[144]*kernel.shared_1[((threadIdx.x*144) + 120)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[145]*kernel.shared_1[((threadIdx.x*144) + 120)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[146]*kernel.shared_1[((threadIdx.x*144) + 120)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[147]*kernel.shared_1[((threadIdx.x*144) + 120)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[148]*kernel.shared_1[((threadIdx.x*144) + 120)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[149]*kernel.shared_1[((threadIdx.x*144) + 120)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[150]*kernel.shared_1[((threadIdx.x*144) + 120)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[171]*kernel.shared_1[((threadIdx.x*144) + 129)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[172]*kernel.shared_1[((threadIdx.x*144) + 129)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[173]*kernel.shared_1[((threadIdx.x*144) + 129)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[174]*kernel.shared_1[((threadIdx.x*144) + 129)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[175]*kernel.shared_1[((threadIdx.x*144) + 129)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[176]*kernel.shared_1[((threadIdx.x*144) + 129)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[177]*kernel.shared_1[((threadIdx.x*144) + 129)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[198]*kernel.shared_1[((threadIdx.x*144) + 138)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[199]*kernel.shared_1[((threadIdx.x*144) + 138)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[200]*kernel.shared_1[((threadIdx.x*144) + 138)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[201]*kernel.shared_1[((threadIdx.x*144) + 138)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[202]*kernel.shared_1[((threadIdx.x*144) + 138)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[203]*kernel.shared_1[((threadIdx.x*144) + 138)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[204]*kernel.shared_1[((threadIdx.x*144) + 138)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[118]*kernel.shared_1[((threadIdx.x*144) + 40)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[119]*kernel.shared_1[((threadIdx.x*144) + 40)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[120]*kernel.shared_1[((threadIdx.x*144) + 40)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[121]*kernel.shared_1[((threadIdx.x*144) + 40)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[122]*kernel.shared_1[((threadIdx.x*144) + 40)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[123]*kernel.shared_1[((threadIdx.x*144) + 40)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[124]*kernel.shared_1[((threadIdx.x*144) + 40)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[145]*kernel.shared_1[((threadIdx.x*144) + 49)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[146]*kernel.shared_1[((threadIdx.x*144) + 49)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[147]*kernel.shared_1[((threadIdx.x*144) + 49)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[148]*kernel.shared_1[((threadIdx.x*144) + 49)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[149]*kernel.shared_1[((threadIdx.x*144) + 49)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[150]*kernel.shared_1[((threadIdx.x*144) + 49)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[151]*kernel.shared_1[((threadIdx.x*144) + 49)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[172]*kernel.shared_1[((threadIdx.x*144) + 58)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[173]*kernel.shared_1[((threadIdx.x*144) + 58)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[174]*kernel.shared_1[((threadIdx.x*144) + 58)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[175]*kernel.shared_1[((threadIdx.x*144) + 58)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[176]*kernel.shared_1[((threadIdx.x*144) + 58)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[177]*kernel.shared_1[((threadIdx.x*144) + 58)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[178]*kernel.shared_1[((threadIdx.x*144) + 58)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[199]*kernel.shared_1[((threadIdx.x*144) + 67)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[200]*kernel.shared_1[((threadIdx.x*144) + 67)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[201]*kernel.shared_1[((threadIdx.x*144) + 67)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[202]*kernel.shared_1[((threadIdx.x*144) + 67)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[203]*kernel.shared_1[((threadIdx.x*144) + 67)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[204]*kernel.shared_1[((threadIdx.x*144) + 67)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[205]*kernel.shared_1[((threadIdx.x*144) + 67)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[118]*kernel.shared_1[((threadIdx.x*144) + 112)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[119]*kernel.shared_1[((threadIdx.x*144) + 112)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[120]*kernel.shared_1[((threadIdx.x*144) + 112)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[121]*kernel.shared_1[((threadIdx.x*144) + 112)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[122]*kernel.shared_1[((threadIdx.x*144) + 112)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[123]*kernel.shared_1[((threadIdx.x*144) + 112)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[124]*kernel.shared_1[((threadIdx.x*144) + 112)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[145]*kernel.shared_1[((threadIdx.x*144) + 121)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[146]*kernel.shared_1[((threadIdx.x*144) + 121)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[147]*kernel.shared_1[((threadIdx.x*144) + 121)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[148]*kernel.shared_1[((threadIdx.x*144) + 121)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[149]*kernel.shared_1[((threadIdx.x*144) + 121)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[150]*kernel.shared_1[((threadIdx.x*144) + 121)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[151]*kernel.shared_1[((threadIdx.x*144) + 121)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[172]*kernel.shared_1[((threadIdx.x*144) + 130)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[173]*kernel.shared_1[((threadIdx.x*144) + 130)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[174]*kernel.shared_1[((threadIdx.x*144) + 130)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[175]*kernel.shared_1[((threadIdx.x*144) + 130)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[176]*kernel.shared_1[((threadIdx.x*144) + 130)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[177]*kernel.shared_1[((threadIdx.x*144) + 130)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[178]*kernel.shared_1[((threadIdx.x*144) + 130)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[199]*kernel.shared_1[((threadIdx.x*144) + 139)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[200]*kernel.shared_1[((threadIdx.x*144) + 139)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[201]*kernel.shared_1[((threadIdx.x*144) + 139)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[202]*kernel.shared_1[((threadIdx.x*144) + 139)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[203]*kernel.shared_1[((threadIdx.x*144) + 139)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[204]*kernel.shared_1[((threadIdx.x*144) + 139)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[205]*kernel.shared_1[((threadIdx.x*144) + 139)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[119]*kernel.shared_1[((threadIdx.x*144) + 41)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[120]*kernel.shared_1[((threadIdx.x*144) + 41)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[121]*kernel.shared_1[((threadIdx.x*144) + 41)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[122]*kernel.shared_1[((threadIdx.x*144) + 41)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[123]*kernel.shared_1[((threadIdx.x*144) + 41)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[124]*kernel.shared_1[((threadIdx.x*144) + 41)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[125]*kernel.shared_1[((threadIdx.x*144) + 41)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[146]*kernel.shared_1[((threadIdx.x*144) + 50)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[147]*kernel.shared_1[((threadIdx.x*144) + 50)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[148]*kernel.shared_1[((threadIdx.x*144) + 50)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[149]*kernel.shared_1[((threadIdx.x*144) + 50)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[150]*kernel.shared_1[((threadIdx.x*144) + 50)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[151]*kernel.shared_1[((threadIdx.x*144) + 50)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[152]*kernel.shared_1[((threadIdx.x*144) + 50)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[173]*kernel.shared_1[((threadIdx.x*144) + 59)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[174]*kernel.shared_1[((threadIdx.x*144) + 59)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[175]*kernel.shared_1[((threadIdx.x*144) + 59)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[176]*kernel.shared_1[((threadIdx.x*144) + 59)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[177]*kernel.shared_1[((threadIdx.x*144) + 59)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[178]*kernel.shared_1[((threadIdx.x*144) + 59)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[179]*kernel.shared_1[((threadIdx.x*144) + 59)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[200]*kernel.shared_1[((threadIdx.x*144) + 68)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[201]*kernel.shared_1[((threadIdx.x*144) + 68)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[202]*kernel.shared_1[((threadIdx.x*144) + 68)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[203]*kernel.shared_1[((threadIdx.x*144) + 68)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[204]*kernel.shared_1[((threadIdx.x*144) + 68)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[205]*kernel.shared_1[((threadIdx.x*144) + 68)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[206]*kernel.shared_1[((threadIdx.x*144) + 68)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[119]*kernel.shared_1[((threadIdx.x*144) + 113)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[120]*kernel.shared_1[((threadIdx.x*144) + 113)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[121]*kernel.shared_1[((threadIdx.x*144) + 113)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[122]*kernel.shared_1[((threadIdx.x*144) + 113)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[123]*kernel.shared_1[((threadIdx.x*144) + 113)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[124]*kernel.shared_1[((threadIdx.x*144) + 113)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[125]*kernel.shared_1[((threadIdx.x*144) + 113)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[146]*kernel.shared_1[((threadIdx.x*144) + 122)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[147]*kernel.shared_1[((threadIdx.x*144) + 122)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[148]*kernel.shared_1[((threadIdx.x*144) + 122)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[149]*kernel.shared_1[((threadIdx.x*144) + 122)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[150]*kernel.shared_1[((threadIdx.x*144) + 122)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[151]*kernel.shared_1[((threadIdx.x*144) + 122)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[152]*kernel.shared_1[((threadIdx.x*144) + 122)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[173]*kernel.shared_1[((threadIdx.x*144) + 131)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[174]*kernel.shared_1[((threadIdx.x*144) + 131)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[175]*kernel.shared_1[((threadIdx.x*144) + 131)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[176]*kernel.shared_1[((threadIdx.x*144) + 131)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[177]*kernel.shared_1[((threadIdx.x*144) + 131)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[178]*kernel.shared_1[((threadIdx.x*144) + 131)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[179]*kernel.shared_1[((threadIdx.x*144) + 131)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[200]*kernel.shared_1[((threadIdx.x*144) + 140)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[201]*kernel.shared_1[((threadIdx.x*144) + 140)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[202]*kernel.shared_1[((threadIdx.x*144) + 140)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[203]*kernel.shared_1[((threadIdx.x*144) + 140)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[204]*kernel.shared_1[((threadIdx.x*144) + 140)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[205]*kernel.shared_1[((threadIdx.x*144) + 140)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[206]*kernel.shared_1[((threadIdx.x*144) + 140)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[126]*kernel.shared_1[((threadIdx.x*144) + 42)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[127]*kernel.shared_1[((threadIdx.x*144) + 42)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[128]*kernel.shared_1[((threadIdx.x*144) + 42)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[129]*kernel.shared_1[((threadIdx.x*144) + 42)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[130]*kernel.shared_1[((threadIdx.x*144) + 42)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[131]*kernel.shared_1[((threadIdx.x*144) + 42)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[132]*kernel.shared_1[((threadIdx.x*144) + 42)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[153]*kernel.shared_1[((threadIdx.x*144) + 51)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[154]*kernel.shared_1[((threadIdx.x*144) + 51)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[155]*kernel.shared_1[((threadIdx.x*144) + 51)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[156]*kernel.shared_1[((threadIdx.x*144) + 51)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[157]*kernel.shared_1[((threadIdx.x*144) + 51)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[158]*kernel.shared_1[((threadIdx.x*144) + 51)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[159]*kernel.shared_1[((threadIdx.x*144) + 51)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[180]*kernel.shared_1[((threadIdx.x*144) + 60)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[181]*kernel.shared_1[((threadIdx.x*144) + 60)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[182]*kernel.shared_1[((threadIdx.x*144) + 60)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[183]*kernel.shared_1[((threadIdx.x*144) + 60)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[184]*kernel.shared_1[((threadIdx.x*144) + 60)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[185]*kernel.shared_1[((threadIdx.x*144) + 60)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[186]*kernel.shared_1[((threadIdx.x*144) + 60)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[207]*kernel.shared_1[((threadIdx.x*144) + 69)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[208]*kernel.shared_1[((threadIdx.x*144) + 69)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[209]*kernel.shared_1[((threadIdx.x*144) + 69)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[210]*kernel.shared_1[((threadIdx.x*144) + 69)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[211]*kernel.shared_1[((threadIdx.x*144) + 69)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[212]*kernel.shared_1[((threadIdx.x*144) + 69)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[213]*kernel.shared_1[((threadIdx.x*144) + 69)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[126]*kernel.shared_1[((threadIdx.x*144) + 114)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[127]*kernel.shared_1[((threadIdx.x*144) + 114)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[128]*kernel.shared_1[((threadIdx.x*144) + 114)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[129]*kernel.shared_1[((threadIdx.x*144) + 114)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[130]*kernel.shared_1[((threadIdx.x*144) + 114)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[131]*kernel.shared_1[((threadIdx.x*144) + 114)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[132]*kernel.shared_1[((threadIdx.x*144) + 114)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[153]*kernel.shared_1[((threadIdx.x*144) + 123)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[154]*kernel.shared_1[((threadIdx.x*144) + 123)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[155]*kernel.shared_1[((threadIdx.x*144) + 123)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[156]*kernel.shared_1[((threadIdx.x*144) + 123)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[157]*kernel.shared_1[((threadIdx.x*144) + 123)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[158]*kernel.shared_1[((threadIdx.x*144) + 123)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[159]*kernel.shared_1[((threadIdx.x*144) + 123)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[180]*kernel.shared_1[((threadIdx.x*144) + 132)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[181]*kernel.shared_1[((threadIdx.x*144) + 132)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[182]*kernel.shared_1[((threadIdx.x*144) + 132)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[183]*kernel.shared_1[((threadIdx.x*144) + 132)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[184]*kernel.shared_1[((threadIdx.x*144) + 132)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[185]*kernel.shared_1[((threadIdx.x*144) + 132)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[186]*kernel.shared_1[((threadIdx.x*144) + 132)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[207]*kernel.shared_1[((threadIdx.x*144) + 141)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[208]*kernel.shared_1[((threadIdx.x*144) + 141)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[209]*kernel.shared_1[((threadIdx.x*144) + 141)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[210]*kernel.shared_1[((threadIdx.x*144) + 141)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[211]*kernel.shared_1[((threadIdx.x*144) + 141)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[212]*kernel.shared_1[((threadIdx.x*144) + 141)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[213]*kernel.shared_1[((threadIdx.x*144) + 141)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[127]*kernel.shared_1[((threadIdx.x*144) + 43)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[128]*kernel.shared_1[((threadIdx.x*144) + 43)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[129]*kernel.shared_1[((threadIdx.x*144) + 43)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[130]*kernel.shared_1[((threadIdx.x*144) + 43)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[131]*kernel.shared_1[((threadIdx.x*144) + 43)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[132]*kernel.shared_1[((threadIdx.x*144) + 43)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[133]*kernel.shared_1[((threadIdx.x*144) + 43)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[154]*kernel.shared_1[((threadIdx.x*144) + 52)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[155]*kernel.shared_1[((threadIdx.x*144) + 52)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[156]*kernel.shared_1[((threadIdx.x*144) + 52)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[157]*kernel.shared_1[((threadIdx.x*144) + 52)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[158]*kernel.shared_1[((threadIdx.x*144) + 52)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[159]*kernel.shared_1[((threadIdx.x*144) + 52)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[160]*kernel.shared_1[((threadIdx.x*144) + 52)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[181]*kernel.shared_1[((threadIdx.x*144) + 61)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[182]*kernel.shared_1[((threadIdx.x*144) + 61)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[183]*kernel.shared_1[((threadIdx.x*144) + 61)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[184]*kernel.shared_1[((threadIdx.x*144) + 61)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[185]*kernel.shared_1[((threadIdx.x*144) + 61)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[186]*kernel.shared_1[((threadIdx.x*144) + 61)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[187]*kernel.shared_1[((threadIdx.x*144) + 61)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[208]*kernel.shared_1[((threadIdx.x*144) + 70)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[209]*kernel.shared_1[((threadIdx.x*144) + 70)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[210]*kernel.shared_1[((threadIdx.x*144) + 70)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[211]*kernel.shared_1[((threadIdx.x*144) + 70)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[212]*kernel.shared_1[((threadIdx.x*144) + 70)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[213]*kernel.shared_1[((threadIdx.x*144) + 70)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[214]*kernel.shared_1[((threadIdx.x*144) + 70)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[127]*kernel.shared_1[((threadIdx.x*144) + 115)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[128]*kernel.shared_1[((threadIdx.x*144) + 115)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[129]*kernel.shared_1[((threadIdx.x*144) + 115)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[130]*kernel.shared_1[((threadIdx.x*144) + 115)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[131]*kernel.shared_1[((threadIdx.x*144) + 115)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[132]*kernel.shared_1[((threadIdx.x*144) + 115)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[133]*kernel.shared_1[((threadIdx.x*144) + 115)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[154]*kernel.shared_1[((threadIdx.x*144) + 124)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[155]*kernel.shared_1[((threadIdx.x*144) + 124)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[156]*kernel.shared_1[((threadIdx.x*144) + 124)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[157]*kernel.shared_1[((threadIdx.x*144) + 124)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[158]*kernel.shared_1[((threadIdx.x*144) + 124)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[159]*kernel.shared_1[((threadIdx.x*144) + 124)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[160]*kernel.shared_1[((threadIdx.x*144) + 124)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[181]*kernel.shared_1[((threadIdx.x*144) + 133)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[182]*kernel.shared_1[((threadIdx.x*144) + 133)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[183]*kernel.shared_1[((threadIdx.x*144) + 133)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[184]*kernel.shared_1[((threadIdx.x*144) + 133)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[185]*kernel.shared_1[((threadIdx.x*144) + 133)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[186]*kernel.shared_1[((threadIdx.x*144) + 133)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[187]*kernel.shared_1[((threadIdx.x*144) + 133)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[208]*kernel.shared_1[((threadIdx.x*144) + 142)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[209]*kernel.shared_1[((threadIdx.x*144) + 142)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[210]*kernel.shared_1[((threadIdx.x*144) + 142)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[211]*kernel.shared_1[((threadIdx.x*144) + 142)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[212]*kernel.shared_1[((threadIdx.x*144) + 142)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[213]*kernel.shared_1[((threadIdx.x*144) + 142)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[214]*kernel.shared_1[((threadIdx.x*144) + 142)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[128]*kernel.shared_1[((threadIdx.x*144) + 44)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[129]*kernel.shared_1[((threadIdx.x*144) + 44)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[130]*kernel.shared_1[((threadIdx.x*144) + 44)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[131]*kernel.shared_1[((threadIdx.x*144) + 44)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[132]*kernel.shared_1[((threadIdx.x*144) + 44)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[133]*kernel.shared_1[((threadIdx.x*144) + 44)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[134]*kernel.shared_1[((threadIdx.x*144) + 44)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[155]*kernel.shared_1[((threadIdx.x*144) + 53)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[156]*kernel.shared_1[((threadIdx.x*144) + 53)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[157]*kernel.shared_1[((threadIdx.x*144) + 53)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[158]*kernel.shared_1[((threadIdx.x*144) + 53)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[159]*kernel.shared_1[((threadIdx.x*144) + 53)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[160]*kernel.shared_1[((threadIdx.x*144) + 53)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[161]*kernel.shared_1[((threadIdx.x*144) + 53)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[182]*kernel.shared_1[((threadIdx.x*144) + 62)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[183]*kernel.shared_1[((threadIdx.x*144) + 62)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[184]*kernel.shared_1[((threadIdx.x*144) + 62)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[185]*kernel.shared_1[((threadIdx.x*144) + 62)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[186]*kernel.shared_1[((threadIdx.x*144) + 62)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[187]*kernel.shared_1[((threadIdx.x*144) + 62)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[188]*kernel.shared_1[((threadIdx.x*144) + 62)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[209]*kernel.shared_1[((threadIdx.x*144) + 71)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[210]*kernel.shared_1[((threadIdx.x*144) + 71)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[211]*kernel.shared_1[((threadIdx.x*144) + 71)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[212]*kernel.shared_1[((threadIdx.x*144) + 71)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[213]*kernel.shared_1[((threadIdx.x*144) + 71)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[214]*kernel.shared_1[((threadIdx.x*144) + 71)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[215]*kernel.shared_1[((threadIdx.x*144) + 71)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[128]*kernel.shared_1[((threadIdx.x*144) + 116)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[129]*kernel.shared_1[((threadIdx.x*144) + 116)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[130]*kernel.shared_1[((threadIdx.x*144) + 116)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[131]*kernel.shared_1[((threadIdx.x*144) + 116)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[132]*kernel.shared_1[((threadIdx.x*144) + 116)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[133]*kernel.shared_1[((threadIdx.x*144) + 116)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[134]*kernel.shared_1[((threadIdx.x*144) + 116)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[155]*kernel.shared_1[((threadIdx.x*144) + 125)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[156]*kernel.shared_1[((threadIdx.x*144) + 125)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[157]*kernel.shared_1[((threadIdx.x*144) + 125)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[158]*kernel.shared_1[((threadIdx.x*144) + 125)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[159]*kernel.shared_1[((threadIdx.x*144) + 125)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[160]*kernel.shared_1[((threadIdx.x*144) + 125)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[161]*kernel.shared_1[((threadIdx.x*144) + 125)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[182]*kernel.shared_1[((threadIdx.x*144) + 134)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[183]*kernel.shared_1[((threadIdx.x*144) + 134)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[184]*kernel.shared_1[((threadIdx.x*144) + 134)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[185]*kernel.shared_1[((threadIdx.x*144) + 134)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[186]*kernel.shared_1[((threadIdx.x*144) + 134)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[187]*kernel.shared_1[((threadIdx.x*144) + 134)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[188]*kernel.shared_1[((threadIdx.x*144) + 134)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[209]*kernel.shared_1[((threadIdx.x*144) + 143)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[210]*kernel.shared_1[((threadIdx.x*144) + 143)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[211]*kernel.shared_1[((threadIdx.x*144) + 143)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[212]*kernel.shared_1[((threadIdx.x*144) + 143)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[213]*kernel.shared_1[((threadIdx.x*144) + 143)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[214]*kernel.shared_1[((threadIdx.x*144) + 143)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[215]*kernel.shared_1[((threadIdx.x*144) + 143)]))
+            attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1: Buffer(pad_temp.shared, float32, [324], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else((((9 <= threadIdx.x_1) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data[(((cse_var_1 + (floordiv(threadIdx.x_1, 9)*7)) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 28)] = @tir.if_then_else(((1 <= floormod((threadIdx.x_1 + 1), 9)) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data[(((cse_var_1 + (floordiv((threadIdx.x_1 + 28), 9)*7)) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 56)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 56), 81)) && (floormod((threadIdx.x_1 + 56), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 56), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 56), 81), 9)*7)) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 84)] = @tir.if_then_else((((9 <= floormod((threadIdx.x_1 + 3), 81)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 84), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 3), 81), 9)*7)) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 112)] = @tir.if_then_else(((1 <= floormod((threadIdx.x_1 + 4), 9)) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 112), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 31), 81), 9)*7)) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 140)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 59), 81)) && (floormod((threadIdx.x_1 + 59), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 140), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 59), 81), 9)*7)) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 168)] = @tir.if_then_else((((9 <= floormod((threadIdx.x_1 + 6), 81)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 168), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 6), 81), 9)*7)) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((1 <= floormod((threadIdx.x_1 + 7), 9)) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 196), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 34), 81), 9)*7)) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 224)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 62), 81)) && (floormod((threadIdx.x_1 + 62), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 224), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 62), 81), 9)*7)) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 252)] = @tir.if_then_else(((1 <= floormod(threadIdx.x_1, 9)) && (floormod(threadIdx.x_1, 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 252), 81)*49)) + ((floordiv(threadIdx.x_1, 9) + 1)*7)) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            pad_temp.shared_1[(threadIdx.x_1 + 280)] = @tir.if_then_else(((1 <= floormod((threadIdx.x_1 + 1), 9)) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 280), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 37), 81), 9)*7)) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28;
+            if @tir.likely((threadIdx.x_1 < 16), dtype=bool) {
+              pad_temp.shared_1[(threadIdx.x_1 + 308)] = @tir.if_then_else((((threadIdx.x_1 < 7) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[((((cse_var_1 + (floordiv((threadIdx.x_1 + 308), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 65), 81), 9)*7)) + (threadIdx.x_1 + 2)) - 8)], 0f32, dtype=float32)
+            }
+            attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 28 {
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1: Buffer(kernel.shared, float32, [576], [], scope="shared")[(threadIdx.x_2*48)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv((floormod(threadIdx.x_2, 3)*4), 3)*9)) + (floormod(threadIdx.x_2, 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 1)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv((floormod(threadIdx.x_2, 3)*4), 3)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 2)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv((floormod(threadIdx.x_2, 3)*4), 3)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 3)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(((floormod(threadIdx.x_2, 3)*4) + 1), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 4)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(((floormod(threadIdx.x_2, 3)*4) + 1), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 5)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(((floormod(threadIdx.x_2, 3)*4) + 1), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 6)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(((floormod(threadIdx.x_2, 3)*4) + 2), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 7)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(((floormod(threadIdx.x_2, 3)*4) + 2), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 8)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(((floormod(threadIdx.x_2, 3)*4) + 2), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 9)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 1), 4)*9)) + (floormod(threadIdx.x_2, 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 10)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 1), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 11)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 1), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 12)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 4), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 13)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 4), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 14)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 4), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 15)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 5), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 16)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 5), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 17)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 5), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 18)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 2), 4)*9)) + (floormod(threadIdx.x_2, 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 19)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 2), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 20)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 2), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 21)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 7), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 22)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 7), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 23)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 1), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 7), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 24)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 8), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 25)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 8), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 26)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 8), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 27)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 3), 4)*9)) + (floormod(threadIdx.x_2, 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 28)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 3), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 29)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 3), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 30)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 10), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 31)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 10), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 32)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 10), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 33)] = kernel[(((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 11), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3))]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 34)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 11), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 1)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 35)] = kernel[((((((blockIdx.x*73728) + (floordiv(((threadIdx.x_2*4) + 2), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 11), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 2)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 36)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv((floormod(threadIdx.x_2, 3)*4), 3)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 4608)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 37)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv((floormod(threadIdx.x_2, 3)*4), 3)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 4609)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 38)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv((floormod(threadIdx.x_2, 3)*4), 3)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 4610)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 39)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 1), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 4608)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 40)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 1), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 4609)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 41)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 1), 12), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + 4610)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 42)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 2), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 4608)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 43)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 2), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 4609)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 44)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floordiv(floormod(((threadIdx.x_2*4) + 2), 12), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + 4610)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 45)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 1), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 4608)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 46)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 1), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 4609)]
+              }
+              if @tir.likely((threadIdx.x_2 < 12), dtype=bool) {
+                kernel.shared_1[((threadIdx.x_2*48) + 47)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2*4), 3)*4608)) + (rc.outer.outer*36)) + (floormod((floordiv((threadIdx.x_2*4), 3) + 1), 4)*9)) + (floormod(threadIdx.x_2, 3)*3)) + 4610)]
+              }
+            }
+            for (rc.outer.inner: int32, 0, 2) {
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18))]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18))]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18))]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18))]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18))]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18))]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18))]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 3)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 3)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 3)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 3)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 3)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 3)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 3)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 6)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 6)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 6)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 6)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 6)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 6)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 6)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 9)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 9)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 9)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 9)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 9)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 9)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 9)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 90)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 12)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 12)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 12)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 12)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 12)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 12)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 12)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 99)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 15)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 15)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 15)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 15)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 15)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 15)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 15)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 36)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 36)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 36)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 36)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 36)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 36)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 36)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 39)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 39)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 39)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 39)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 39)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 39)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 39)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 42)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 42)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 42)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 42)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 42)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 42)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 42)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 45)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 45)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 45)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 45)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 45)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 45)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 45)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 90)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 48)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 48)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 48)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 48)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 48)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 48)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 48)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 99)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 51)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 51)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 51)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 51)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 51)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 51)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 51)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 72)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 72)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 72)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 72)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 72)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 72)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 72)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 75)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 75)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 75)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 75)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 75)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 75)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 75)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 78)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 78)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 78)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 78)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 78)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 78)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 78)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 81)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 81)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 81)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 81)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 81)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 81)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 81)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 90)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 84)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 84)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 84)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 84)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 84)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 84)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 84)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 99)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 87)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 87)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 87)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 87)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 87)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 87)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 87)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 108)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 108)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 108)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 108)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 108)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 108)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 108)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 111)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 111)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 111)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 111)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 111)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 111)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 111)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 114)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 114)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 114)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 114)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 114)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 114)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 114)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 117)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 117)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 117)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 117)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 117)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 117)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 117)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 90)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 120)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 120)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 120)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 120)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 120)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 120)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 120)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 99)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 123)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 123)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 123)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 123)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 123)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 123)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 123)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 1)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 1)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 1)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 1)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 1)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 1)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 1)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 4)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 4)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 4)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 4)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 4)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 4)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 4)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 7)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 7)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 7)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 7)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 7)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 7)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 7)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 10)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 10)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 10)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 10)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 10)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 10)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 10)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 13)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 13)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 13)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 13)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 13)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 13)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 13)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 16)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 16)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 16)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 16)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 16)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 16)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 16)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 37)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 37)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 37)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 37)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 37)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 37)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 37)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 40)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 40)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 40)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 40)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 40)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 40)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 40)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 43)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 43)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 43)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 43)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 43)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 43)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 43)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 46)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 46)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 46)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 46)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 46)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 46)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 46)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 49)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 49)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 49)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 49)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 49)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 49)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 49)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 52)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 52)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 52)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 52)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 52)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 52)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 52)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 73)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 73)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 73)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 73)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 73)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 73)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 73)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 76)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 76)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 76)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 76)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 76)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 76)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 76)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 79)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 79)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 79)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 79)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 79)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 79)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 79)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 82)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 82)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 82)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 82)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 82)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 82)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 82)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 85)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 85)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 85)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 85)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 85)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 85)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 85)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 88)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 88)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 88)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 88)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 88)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 88)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 88)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 109)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 109)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 109)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 109)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 109)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 109)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 109)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 112)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 112)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 112)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 112)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 112)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 112)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 112)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 115)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 115)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 115)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 115)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 115)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 115)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 115)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 118)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 118)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 118)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 118)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 118)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 118)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 118)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 91)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 121)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 121)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 121)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 121)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 121)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 121)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 121)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 100)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 124)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 124)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 124)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 124)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 124)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 124)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 124)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 2)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 2)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 2)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 2)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 2)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 2)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 2)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 5)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 5)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 5)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 5)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 5)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 5)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 5)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 8)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 8)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 8)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 8)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 8)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 8)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 26)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 8)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 11)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 11)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 11)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 11)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 11)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 11)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 89)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 11)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 14)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 14)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 14)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 14)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 14)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 14)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 98)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 14)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 17)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 17)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 17)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 17)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 17)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 17)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 107)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 17)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 38)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 38)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 38)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 38)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 38)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 38)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 38)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 41)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 41)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 41)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 41)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 41)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 41)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 41)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 44)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 44)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 44)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 44)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 44)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 44)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 26)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 44)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 47)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 47)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 47)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 47)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 47)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 47)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 89)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 47)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 50)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 50)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 50)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 50)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 50)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 50)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 98)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 50)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 53)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 53)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 53)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 53)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 53)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 53)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 107)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 53)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 74)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 74)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 74)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 74)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 74)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 74)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 74)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 77)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 77)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 77)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 77)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 77)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 77)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 77)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 80)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 80)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 80)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 80)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 80)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 80)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 26)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 80)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 83)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 83)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 83)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 83)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 83)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 83)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 89)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 83)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 86)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 86)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 86)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 86)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 86)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 86)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 98)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 86)]))
+              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 89)]))
+              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 89)]))
+              conv2d_nchw_1[16] = (conv2d_nchw_1[16] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 89)]))
+              conv2d_nchw_1[17] = (conv2d_nchw_1[17] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 89)]))
+              conv2d_nchw_1[18] = (conv2d_nchw_1[18] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 89)]))
+              conv2d_nchw_1[19] = (conv2d_nchw_1[19] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 89)]))
+              conv2d_nchw_1[20] = (conv2d_nchw_1[20] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 107)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 89)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 110)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 110)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 110)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 110)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 110)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 110)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 110)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 113)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 113)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 113)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 113)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 113)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 113)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 113)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 116)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 21)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 116)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 22)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 116)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 23)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 116)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 24)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 116)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 25)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 116)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 26)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 116)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 119)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 84)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 119)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 85)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 119)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 86)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 119)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 87)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 119)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 88)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 119)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 89)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 119)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 92)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 122)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 93)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 122)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 94)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 122)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 95)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 122)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 96)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 122)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 97)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 122)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 98)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 122)]))
+              conv2d_nchw_1[21] = (conv2d_nchw_1[21] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 101)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 125)]))
+              conv2d_nchw_1[22] = (conv2d_nchw_1[22] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 102)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 125)]))
+              conv2d_nchw_1[23] = (conv2d_nchw_1[23] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 103)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 125)]))
+              conv2d_nchw_1[24] = (conv2d_nchw_1[24] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 104)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 125)]))
+              conv2d_nchw_1[25] = (conv2d_nchw_1[25] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 105)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 125)]))
+              conv2d_nchw_1[26] = (conv2d_nchw_1[26] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 106)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 125)]))
+              conv2d_nchw_1[27] = (conv2d_nchw_1[27] + (pad_temp.shared_1[(((rc.outer.inner*162) + (floormod(threadIdx.x, 7)*9)) + 107)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*144) + (rc.outer.inner*18)) + 125)]))
+            }
           }
         }
-        for (i1.inner: int32, 0, 2) {
-          compute[((((floordiv(blockIdx.x, 7)*784) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7))] = max((conv2d_nchw_1[i1.inner] + bias[(((floordiv(blockIdx.x, 7)*16) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          compute[(((((floordiv(blockIdx.x, 7)*784) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + 1)] = max((conv2d_nchw_1[(i1.inner + 2)] + bias[(((floordiv(blockIdx.x, 7)*16) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          compute[(((((floordiv(blockIdx.x, 7)*784) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + 2)] = max((conv2d_nchw_1[(i1.inner + 4)] + bias[(((floordiv(blockIdx.x, 7)*16) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          compute[(((((floordiv(blockIdx.x, 7)*784) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + 3)] = max((conv2d_nchw_1[(i1.inner + 6)] + bias[(((floordiv(blockIdx.x, 7)*16) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          compute[(((((floordiv(blockIdx.x, 7)*784) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + 4)] = max((conv2d_nchw_1[(i1.inner + 8)] + bias[(((floordiv(blockIdx.x, 7)*16) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          compute[(((((floordiv(blockIdx.x, 7)*784) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + 5)] = max((conv2d_nchw_1[(i1.inner + 10)] + bias[(((floordiv(blockIdx.x, 7)*16) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          compute[(((((floordiv(blockIdx.x, 7)*784) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + 6)] = max((conv2d_nchw_1[(i1.inner + 12)] + bias[(((floordiv(blockIdx.x, 7)*16) + (threadIdx.x*2)) + i1.inner)]), 0f32)
+        for (i1.inner: int32, 0, 4) {
+          for (i3.inner: int32, 0, 7) {
+            compute[(((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*16) + (floordiv(threadIdx.x, 7)*4)) + i1.inner)]), 0f32)
+          }
         }
       }
     }
@@ -1675,7 +1013,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.427 ms
+    Execution time of this operator: 0.378 ms
 
 
 
@@ -1724,36 +1062,36 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
     conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
+    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=4)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=4)
     conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
-    conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
+    conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=7)
     conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
     conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
-    conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=7)
-    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=4)
+    conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
+    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
     conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=2)
-    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
-    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=3)
+    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
+    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
     conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
     conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=4)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=4)
     compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
     compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
     compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
-    compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=7)
+    compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
     kernel_shared = s.cache_read(kernel, "shared", [conv2d_nchw])
@@ -1770,16 +1108,16 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused = s[compute].fuse(compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i)
     s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread_axis("threadIdx.x"))
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=48)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=8)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=28)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=8)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=28)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 1024)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -1797,1215 +1135,710 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(8) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[14];
-      __shared__ float pad_temp_shared[216];
-      __shared__ float kernel_shared[1152];
+    extern "C" __global__ void __launch_bounds__(28) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[28];
+      __shared__ float pad_temp_shared[324];
+      __shared__ float kernel_shared[576];
       conv2d_nchw[0] = 0.000000e+00f;
-      conv2d_nchw[2] = 0.000000e+00f;
-      conv2d_nchw[4] = 0.000000e+00f;
-      conv2d_nchw[6] = 0.000000e+00f;
-      conv2d_nchw[8] = 0.000000e+00f;
-      conv2d_nchw[10] = 0.000000e+00f;
-      conv2d_nchw[12] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
+      conv2d_nchw[2] = 0.000000e+00f;
       conv2d_nchw[3] = 0.000000e+00f;
+      conv2d_nchw[4] = 0.000000e+00f;
       conv2d_nchw[5] = 0.000000e+00f;
+      conv2d_nchw[6] = 0.000000e+00f;
       conv2d_nchw[7] = 0.000000e+00f;
+      conv2d_nchw[8] = 0.000000e+00f;
       conv2d_nchw[9] = 0.000000e+00f;
+      conv2d_nchw[10] = 0.000000e+00f;
       conv2d_nchw[11] = 0.000000e+00f;
+      conv2d_nchw[12] = 0.000000e+00f;
       conv2d_nchw[13] = 0.000000e+00f;
-      for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
+      conv2d_nchw[14] = 0.000000e+00f;
+      conv2d_nchw[15] = 0.000000e+00f;
+      conv2d_nchw[16] = 0.000000e+00f;
+      conv2d_nchw[17] = 0.000000e+00f;
+      conv2d_nchw[18] = 0.000000e+00f;
+      conv2d_nchw[19] = 0.000000e+00f;
+      conv2d_nchw[20] = 0.000000e+00f;
+      conv2d_nchw[21] = 0.000000e+00f;
+      conv2d_nchw[22] = 0.000000e+00f;
+      conv2d_nchw[23] = 0.000000e+00f;
+      conv2d_nchw[24] = 0.000000e+00f;
+      conv2d_nchw[25] = 0.000000e+00f;
+      conv2d_nchw[26] = 0.000000e+00f;
+      conv2d_nchw[27] = 0.000000e+00f;
+      for (int rc_outer_outer = 0; rc_outer_outer < 128; ++rc_outer_outer) {
         __syncthreads();
-        pad_temp_shared[((int)threadIdx.x)] = (((1 <= (((int)blockIdx.x) % 7)) && (1 <= ((int)threadIdx.x))) ? data[((((rc_outer_outer * 392) + ((((int)blockIdx.x) % 7) * 7)) + ((int)threadIdx.x)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 8)] = ((((1 <= (((((int)threadIdx.x) + 8) / 9) + (((int)blockIdx.x) % 7))) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 8) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 16)] = (((((((((int)threadIdx.x) + 16) / 9) + (((int)blockIdx.x) % 7)) < 8) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 16) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 24)] = (((((1 <= ((((((int)threadIdx.x) + 24) % 27) / 9) + (((int)blockIdx.x) % 7))) && (((((((int)threadIdx.x) + 24) % 27) / 9) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 24) / 27) * 49)) + ((((((int)threadIdx.x) + 24) % 27) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] :  [...]
-        pad_temp_shared[(((int)threadIdx.x) + 32)] = ((((1 <= (((((int)threadIdx.x) + 5) / 9) + (((int)blockIdx.x) % 7))) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 32) / 27) * 49)) + (((((int)threadIdx.x) + 5) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 40)] = (((((((((int)threadIdx.x) + 13) / 9) + (((int)blockIdx.x) % 7)) < 8) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 40) / 27) * 49)) + (((((int)threadIdx.x) + 13) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 48)] = (((((1 <= ((((((int)threadIdx.x) + 21) % 27) / 9) + (((int)blockIdx.x) % 7))) && (((((((int)threadIdx.x) + 21) % 27) / 9) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 48) / 27) * 49)) + ((((((int)threadIdx.x) + 21) % 27) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] :  [...]
-        pad_temp_shared[(((int)threadIdx.x) + 56)] = ((((1 <= (((((int)threadIdx.x) + 2) / 9) + (((int)blockIdx.x) % 7))) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 56) / 27) * 49)) + (((((int)threadIdx.x) + 2) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 64)] = ((((int)threadIdx.x) < 7) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 64) / 27) * 49)) + (((((int)threadIdx.x) + 10) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((int)threadIdx.x)) - 7)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 72)] = ((((((int)blockIdx.x) % 7) < 6) && (1 <= ((int)threadIdx.x))) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 72) / 27) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + ((int)threadIdx.x)) + 6)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 80)] = (((((1 <= ((((((int)threadIdx.x) + 26) % 27) / 9) + (((int)blockIdx.x) % 7))) && (((((((int)threadIdx.x) + 26) % 27) / 9) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 80) / 27) * 49)) + ((((((int)threadIdx.x) + 26) % 27) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] :  [...]
-        pad_temp_shared[(((int)threadIdx.x) + 88)] = ((((1 <= (((((int)threadIdx.x) + 7) / 9) + (((int)blockIdx.x) % 7))) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 88) / 27) * 49)) + (((((int)threadIdx.x) + 7) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 96)] = (((((((((int)threadIdx.x) + 15) / 9) + (((int)blockIdx.x) % 7)) < 8) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 96) / 27) * 49)) + (((((int)threadIdx.x) + 15) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 104)] = (((((1 <= ((((((int)threadIdx.x) + 23) % 27) / 9) + (((int)blockIdx.x) % 7))) && (((((((int)threadIdx.x) + 23) % 27) / 9) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 104) / 27) * 49)) + ((((((int)threadIdx.x) + 23) % 27) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)]  [...]
-        pad_temp_shared[(((int)threadIdx.x) + 112)] = ((((1 <= (((((int)threadIdx.x) + 4) / 9) + (((int)blockIdx.x) % 7))) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 112) / 27) * 49)) + (((((int)threadIdx.x) + 4) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 120)] = (((((((((int)threadIdx.x) + 12) / 9) + (((int)blockIdx.x) % 7)) < 8) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 120) / 27) * 49)) + (((((int)threadIdx.x) + 12) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 128)] = (((((1 <= ((((((int)threadIdx.x) + 20) % 27) / 9) + (((int)blockIdx.x) % 7))) && (((((((int)threadIdx.x) + 20) % 27) / 9) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 128) / 27) * 49)) + ((((((int)threadIdx.x) + 20) % 27) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)]  [...]
-        pad_temp_shared[(((int)threadIdx.x) + 136)] = (((1 <= (((int)blockIdx.x) % 7)) && (((int)threadIdx.x) < 7)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 136) / 27) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + ((int)threadIdx.x)) - 7)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 144)] = ((1 <= ((int)threadIdx.x)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 144) / 27) * 49)) + ((((int)blockIdx.x) % 7) * 7)) + ((int)threadIdx.x)) - 1)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 152)] = (((((((((int)threadIdx.x) + 17) / 9) + (((int)blockIdx.x) % 7)) < 8) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 152) / 27) * 49)) + (((((int)threadIdx.x) + 17) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 160)] = (((((1 <= ((((((int)threadIdx.x) + 25) % 27) / 9) + (((int)blockIdx.x) % 7))) && (((((((int)threadIdx.x) + 25) % 27) / 9) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 160) / 27) * 49)) + ((((((int)threadIdx.x) + 25) % 27) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)]  [...]
-        pad_temp_shared[(((int)threadIdx.x) + 168)] = ((((1 <= (((((int)threadIdx.x) + 6) / 9) + (((int)blockIdx.x) % 7))) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 168) / 27) * 49)) + (((((int)threadIdx.x) + 6) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 176)] = (((((((((int)threadIdx.x) + 14) / 9) + (((int)blockIdx.x) % 7)) < 8) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 176) / 27) * 49)) + (((((int)threadIdx.x) + 14) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 184)] = (((((1 <= ((((((int)threadIdx.x) + 22) % 27) / 9) + (((int)blockIdx.x) % 7))) && (((((((int)threadIdx.x) + 22) % 27) / 9) + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 184) / 27) * 49)) + ((((((int)threadIdx.x) + 22) % 27) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)]  [...]
-        pad_temp_shared[(((int)threadIdx.x) + 192)] = ((((1 <= (((((int)threadIdx.x) + 3) / 9) + (((int)blockIdx.x) % 7))) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 192) / 27) * 49)) + (((((int)threadIdx.x) + 3) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 200)] = (((((((((int)threadIdx.x) + 11) / 9) + (((int)blockIdx.x) % 7)) < 8) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 200) / 27) * 49)) + (((((int)threadIdx.x) + 11) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 208)] = ((((((((int)threadIdx.x) + 19) / 9) + (((int)blockIdx.x) % 7)) < 8) && (((int)threadIdx.x) < 7)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 208) / 27) * 49)) + (((((int)threadIdx.x) + 19) / 9) * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((int)threadIdx.x)) - 7)] : 0.000000e+00f);
-        kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x))];
-        kernel_shared[(((int)threadIdx.x) + 8)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 16)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 24)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 24)];
-        kernel_shared[(((int)threadIdx.x) + 32)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 40)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 48)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 48)];
-        kernel_shared[(((int)threadIdx.x) + 56)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 72)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 4608)];
-        kernel_shared[(((int)threadIdx.x) + 80)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 4608)];
-        kernel_shared[(((int)threadIdx.x) + 88)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 4608)];
-        kernel_shared[(((int)threadIdx.x) + 96)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 4632)];
-        kernel_shared[(((int)threadIdx.x) + 104)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 4608)];
-        kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 4608)];
-        kernel_shared[(((int)threadIdx.x) + 120)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 4656)];
-        kernel_shared[(((int)threadIdx.x) + 128)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 4608)];
-        kernel_shared[(((int)threadIdx.x) + 136)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 4608)];
-        kernel_shared[(((int)threadIdx.x) + 144)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 9216)];
-        kernel_shared[(((int)threadIdx.x) + 152)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 9216)];
-        kernel_shared[(((int)threadIdx.x) + 160)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 9216)];
-        kernel_shared[(((int)threadIdx.x) + 168)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 9240)];
-        kernel_shared[(((int)threadIdx.x) + 176)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 9216)];
-        kernel_shared[(((int)threadIdx.x) + 184)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 9216)];
-        kernel_shared[(((int)threadIdx.x) + 192)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 9264)];
-        kernel_shared[(((int)threadIdx.x) + 200)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 9216)];
-        kernel_shared[(((int)threadIdx.x) + 208)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 9216)];
-        kernel_shared[(((int)threadIdx.x) + 216)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 13824)];
-        kernel_shared[(((int)threadIdx.x) + 224)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 13824)];
-        kernel_shared[(((int)threadIdx.x) + 232)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 13824)];
-        kernel_shared[(((int)threadIdx.x) + 240)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 13848)];
-        kernel_shared[(((int)threadIdx.x) + 248)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 13824)];
-        kernel_shared[(((int)threadIdx.x) + 256)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 13824)];
-        kernel_shared[(((int)threadIdx.x) + 264)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 13872)];
-        kernel_shared[(((int)threadIdx.x) + 272)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 13824)];
-        kernel_shared[(((int)threadIdx.x) + 280)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 13824)];
-        kernel_shared[(((int)threadIdx.x) + 288)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 18432)];
-        kernel_shared[(((int)threadIdx.x) + 296)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 18432)];
-        kernel_shared[(((int)threadIdx.x) + 304)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 18432)];
-        kernel_shared[(((int)threadIdx.x) + 312)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 18456)];
-        kernel_shared[(((int)threadIdx.x) + 320)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 18432)];
-        kernel_shared[(((int)threadIdx.x) + 328)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 18432)];
-        kernel_shared[(((int)threadIdx.x) + 336)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 18480)];
-        kernel_shared[(((int)threadIdx.x) + 344)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 18432)];
-        kernel_shared[(((int)threadIdx.x) + 352)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 18432)];
-        kernel_shared[(((int)threadIdx.x) + 360)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 23040)];
-        kernel_shared[(((int)threadIdx.x) + 368)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 23040)];
-        kernel_shared[(((int)threadIdx.x) + 376)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 23040)];
-        kernel_shared[(((int)threadIdx.x) + 384)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 23064)];
-        kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 23040)];
-        kernel_shared[(((int)threadIdx.x) + 400)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 23040)];
-        kernel_shared[(((int)threadIdx.x) + 408)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 23088)];
-        kernel_shared[(((int)threadIdx.x) + 416)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 23040)];
-        kernel_shared[(((int)threadIdx.x) + 424)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 23040)];
-        kernel_shared[(((int)threadIdx.x) + 432)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 27648)];
-        kernel_shared[(((int)threadIdx.x) + 440)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 27648)];
-        kernel_shared[(((int)threadIdx.x) + 448)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 27648)];
-        kernel_shared[(((int)threadIdx.x) + 456)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 27672)];
-        kernel_shared[(((int)threadIdx.x) + 464)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 27648)];
-        kernel_shared[(((int)threadIdx.x) + 472)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 27648)];
-        kernel_shared[(((int)threadIdx.x) + 480)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 27696)];
-        kernel_shared[(((int)threadIdx.x) + 488)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 27648)];
-        kernel_shared[(((int)threadIdx.x) + 496)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 27648)];
-        kernel_shared[(((int)threadIdx.x) + 504)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 32256)];
-        kernel_shared[(((int)threadIdx.x) + 512)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 32256)];
-        kernel_shared[(((int)threadIdx.x) + 520)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 32256)];
-        kernel_shared[(((int)threadIdx.x) + 528)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 32280)];
-        kernel_shared[(((int)threadIdx.x) + 536)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 32256)];
-        kernel_shared[(((int)threadIdx.x) + 544)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 32256)];
-        kernel_shared[(((int)threadIdx.x) + 552)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 32304)];
-        kernel_shared[(((int)threadIdx.x) + 560)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 32256)];
-        kernel_shared[(((int)threadIdx.x) + 568)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 32256)];
-        kernel_shared[(((int)threadIdx.x) + 576)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 36864)];
-        kernel_shared[(((int)threadIdx.x) + 584)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 36864)];
-        kernel_shared[(((int)threadIdx.x) + 592)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 36864)];
-        kernel_shared[(((int)threadIdx.x) + 600)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 36888)];
-        kernel_shared[(((int)threadIdx.x) + 608)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 36864)];
-        kernel_shared[(((int)threadIdx.x) + 616)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 36864)];
-        kernel_shared[(((int)threadIdx.x) + 624)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 36912)];
-        kernel_shared[(((int)threadIdx.x) + 632)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 36864)];
-        kernel_shared[(((int)threadIdx.x) + 640)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 36864)];
-        kernel_shared[(((int)threadIdx.x) + 648)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 41472)];
-        kernel_shared[(((int)threadIdx.x) + 656)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 41472)];
-        kernel_shared[(((int)threadIdx.x) + 664)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 41472)];
-        kernel_shared[(((int)threadIdx.x) + 672)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 41496)];
-        kernel_shared[(((int)threadIdx.x) + 680)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 41472)];
-        kernel_shared[(((int)threadIdx.x) + 688)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 41472)];
-        kernel_shared[(((int)threadIdx.x) + 696)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 41520)];
-        kernel_shared[(((int)threadIdx.x) + 704)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 41472)];
-        kernel_shared[(((int)threadIdx.x) + 712)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 41472)];
-        kernel_shared[(((int)threadIdx.x) + 720)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 46080)];
-        kernel_shared[(((int)threadIdx.x) + 728)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 46080)];
-        kernel_shared[(((int)threadIdx.x) + 736)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 46080)];
-        kernel_shared[(((int)threadIdx.x) + 744)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 46104)];
-        kernel_shared[(((int)threadIdx.x) + 752)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 46080)];
-        kernel_shared[(((int)threadIdx.x) + 760)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 46080)];
-        kernel_shared[(((int)threadIdx.x) + 768)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 46128)];
-        kernel_shared[(((int)threadIdx.x) + 776)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 46080)];
-        kernel_shared[(((int)threadIdx.x) + 784)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 46080)];
-        kernel_shared[(((int)threadIdx.x) + 792)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 50688)];
-        kernel_shared[(((int)threadIdx.x) + 800)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 50688)];
-        kernel_shared[(((int)threadIdx.x) + 808)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 50688)];
-        kernel_shared[(((int)threadIdx.x) + 816)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 50712)];
-        kernel_shared[(((int)threadIdx.x) + 824)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 50688)];
-        kernel_shared[(((int)threadIdx.x) + 832)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 50688)];
-        kernel_shared[(((int)threadIdx.x) + 840)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 50736)];
-        kernel_shared[(((int)threadIdx.x) + 848)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 50688)];
-        kernel_shared[(((int)threadIdx.x) + 856)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 50688)];
-        kernel_shared[(((int)threadIdx.x) + 864)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 55296)];
-        kernel_shared[(((int)threadIdx.x) + 872)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 55296)];
-        kernel_shared[(((int)threadIdx.x) + 880)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 55296)];
-        kernel_shared[(((int)threadIdx.x) + 888)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 55320)];
-        kernel_shared[(((int)threadIdx.x) + 896)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 55296)];
-        kernel_shared[(((int)threadIdx.x) + 904)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 55296)];
-        kernel_shared[(((int)threadIdx.x) + 912)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 55344)];
-        kernel_shared[(((int)threadIdx.x) + 920)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 55296)];
-        kernel_shared[(((int)threadIdx.x) + 928)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 55296)];
-        kernel_shared[(((int)threadIdx.x) + 936)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 59904)];
-        kernel_shared[(((int)threadIdx.x) + 944)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 59904)];
-        kernel_shared[(((int)threadIdx.x) + 952)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 59904)];
-        kernel_shared[(((int)threadIdx.x) + 960)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 59928)];
-        kernel_shared[(((int)threadIdx.x) + 968)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 59904)];
-        kernel_shared[(((int)threadIdx.x) + 976)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 59904)];
-        kernel_shared[(((int)threadIdx.x) + 984)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 59952)];
-        kernel_shared[(((int)threadIdx.x) + 992)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 59904)];
-        kernel_shared[(((int)threadIdx.x) + 1000)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 59904)];
-        kernel_shared[(((int)threadIdx.x) + 1008)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 64512)];
-        kernel_shared[(((int)threadIdx.x) + 1016)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 64512)];
-        kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 64512)];
-        kernel_shared[(((int)threadIdx.x) + 1032)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 64536)];
-        kernel_shared[(((int)threadIdx.x) + 1040)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 64512)];
-        kernel_shared[(((int)threadIdx.x) + 1048)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 64512)];
-        kernel_shared[(((int)threadIdx.x) + 1056)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 64560)];
-        kernel_shared[(((int)threadIdx.x) + 1064)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 64512)];
-        kernel_shared[(((int)threadIdx.x) + 1072)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 64512)];
-        kernel_shared[(((int)threadIdx.x) + 1080)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 69120)];
-        kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 8) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 69120)];
-        kernel_shared[(((int)threadIdx.x) + 1096)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 16) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 69120)];
-        kernel_shared[(((int)threadIdx.x) + 1104)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 69144)];
-        kernel_shared[(((int)threadIdx.x) + 1112)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 32) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 69120)];
-        kernel_shared[(((int)threadIdx.x) + 1120)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 69120)];
-        kernel_shared[(((int)threadIdx.x) + 1128)] = kernel[(((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + ((int)threadIdx.x)) + 69168)];
-        kernel_shared[(((int)threadIdx.x) + 1136)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 56) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3)) + 69120)];
-        kernel_shared[(((int)threadIdx.x) + 1144)] = kernel[((((((((int)blockIdx.x) / 7) * 73728) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 64) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3)) + 69120)];
+        pad_temp_shared[((int)threadIdx.x)] = ((((9 <= ((int)threadIdx.x)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[((((rc_outer_outer * 196) + ((((int)threadIdx.x) / 9) * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 28)] = (((1 <= ((((int)threadIdx.x) + 1) % 9)) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 28) / 9) * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 56)] = (((((9 <= ((((int)threadIdx.x) + 56) % 81)) && (((((int)threadIdx.x) + 56) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 56) / 81) * 49)) + ((((((int)threadIdx.x) + 56) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 84)] = ((((6 <= ((int)threadIdx.x)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 84) / 81) * 49)) + (((((int)threadIdx.x) + 3) / 9) * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 112)] = (((1 <= ((((int)threadIdx.x) + 4) % 9)) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 112) / 81) * 49)) + (((((int)threadIdx.x) + 31) / 9) * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 140)] = (((((9 <= ((((int)threadIdx.x) + 59) % 81)) && (((((int)threadIdx.x) + 59) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 140) / 81) * 49)) + ((((((int)threadIdx.x) + 59) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 168)] = ((((3 <= ((int)threadIdx.x)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 168) / 81) * 49)) + (((((int)threadIdx.x) + 6) / 9) * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 196)] = (((1 <= ((((int)threadIdx.x) + 7) % 9)) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 196) / 81) * 49)) + (((((int)threadIdx.x) + 34) / 9) * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 224)] = (((((9 <= ((((int)threadIdx.x) + 62) % 81)) && (((((int)threadIdx.x) + 62) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 224) / 81) * 49)) + ((((((int)threadIdx.x) + 62) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 252)] = (((1 <= (((int)threadIdx.x) % 9)) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 252) / 81) * 49)) + ((((int)threadIdx.x) / 9) * 7)) + (((int)threadIdx.x) % 9)) - 1)] : 0.000000e+00f);
+        pad_temp_shared[(((int)threadIdx.x) + 280)] = (((1 <= ((((int)threadIdx.x) + 1) % 9)) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 280) / 81) * 49)) + (((((int)threadIdx.x) + 37) / 9) * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
+        if (((int)threadIdx.x) < 16) {
+          pad_temp_shared[(((int)threadIdx.x) + 308)] = ((((((int)threadIdx.x) < 7) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 196) + (((((int)threadIdx.x) + 308) / 81) * 49)) + (((((int)threadIdx.x) + 65) / 9) * 7)) + ((int)threadIdx.x)) - 6)] : 0.000000e+00f);
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[(((int)threadIdx.x) * 48)] = kernel[((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + ((((int)threadIdx.x) % 3) * 12))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 1)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + ((((int)threadIdx.x) % 3) * 12)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 2)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + ((((int)threadIdx.x) % 3) * 12)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 3)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 1) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 4)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 1) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 5)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 1) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 6)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 2) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 7)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 2) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 8)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 2) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 9)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 1) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 10)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 1) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 11)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 1) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 12)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 4) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 13)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 4) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 14)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 4) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 15)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 5) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 16)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 5) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 17)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 5) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 18)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 2) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 19)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 2) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 20)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 2) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 21)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 7) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 22)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 7) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 23)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 1) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 7) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 24)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 8) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 25)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 8) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 26)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 8) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 27)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 3) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 28)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 3) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 29)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 3) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 30)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 10) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 31)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 10) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 32)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 10) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 33)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 11) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3))];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 34)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 11) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 1)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 35)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((((int)threadIdx.x) * 4) + 2) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) + 11) % 12) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 2)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 36)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + ((((int)threadIdx.x) % 3) * 12)) + 4608)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 37)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + ((((int)threadIdx.x) % 3) * 12)) + 4609)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 38)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + ((((int)threadIdx.x) % 3) * 12)) + 4610)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 39)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 1) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 4608)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 40)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 1) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 4609)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 41)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 1) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + 4610)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 42)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 2) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 4608)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 43)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 2) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 4609)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 44)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) % 3) * 4) + 2) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + 4610)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 45)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 1) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 4608)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 46)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 1) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 4609)];
+        }
+        if (((int)threadIdx.x) < 12) {
+          kernel_shared[((((int)threadIdx.x) * 48) + 47)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) * 4) / 3) * 4608)) + (rc_outer_outer * 36)) + (((((((int)threadIdx.x) * 4) / 3) + 1) & 3) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + 4610)];
+        }
         __syncthreads();
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 144)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 144)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 144)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 144)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 144)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 144)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 144)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 144) + 9)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 144) + 9)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 144) + 9)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 144) + 9)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 144) + 9)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 144) + 9)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 144) + 9)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 144) + 18)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 144) + 18)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 144) + 18)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 144) + 18)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 144) + 18)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 144) + 18)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 144) + 18)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[81] * kernel_shared[((((int)threadIdx.x) * 144) + 27)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[82] * kernel_shared[((((int)threadIdx.x) * 144) + 27)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[83] * kernel_shared[((((int)threadIdx.x) * 144) + 27)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[84] * kernel_shared[((((int)threadIdx.x) * 144) + 27)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[85] * kernel_shared[((((int)threadIdx.x) * 144) + 27)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[86] * kernel_shared[((((int)threadIdx.x) * 144) + 27)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[87] * kernel_shared[((((int)threadIdx.x) * 144) + 27)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 144) + 72)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 144) + 72)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 144) + 72)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 144) + 72)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 144) + 72)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 144) + 72)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 144) + 72)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 144) + 81)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 144) + 81)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 144) + 81)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 144) + 81)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 144) + 81)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 144) + 81)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 144) + 81)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 144) + 90)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 144) + 90)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 144) + 90)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 144) + 90)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 144) + 90)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 144) + 90)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 144) + 90)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[81] * kernel_shared[((((int)threadIdx.x) * 144) + 99)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[82] * kernel_shared[((((int)threadIdx.x) * 144) + 99)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[83] * kernel_shared[((((int)threadIdx.x) * 144) + 99)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[84] * kernel_shared[((((int)threadIdx.x) * 144) + 99)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[85] * kernel_shared[((((int)threadIdx.x) * 144) + 99)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[86] * kernel_shared[((((int)threadIdx.x) * 144) + 99)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[87] * kernel_shared[((((int)threadIdx.x) * 144) + 99)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 144) + 1)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 144) + 1)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 144) + 1)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 144) + 1)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 144) + 1)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 144) + 1)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 144) + 1)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 144) + 10)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 144) + 10)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 144) + 10)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 144) + 10)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 144) + 10)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 144) + 10)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 144) + 10)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 144) + 19)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 144) + 19)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 144) + 19)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 144) + 19)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 144) + 19)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 144) + 19)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 144) + 19)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[82] * kernel_shared[((((int)threadIdx.x) * 144) + 28)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[83] * kernel_shared[((((int)threadIdx.x) * 144) + 28)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[84] * kernel_shared[((((int)threadIdx.x) * 144) + 28)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[85] * kernel_shared[((((int)threadIdx.x) * 144) + 28)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[86] * kernel_shared[((((int)threadIdx.x) * 144) + 28)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[87] * kernel_shared[((((int)threadIdx.x) * 144) + 28)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[88] * kernel_shared[((((int)threadIdx.x) * 144) + 28)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 144) + 73)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 144) + 73)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 144) + 73)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 144) + 73)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 144) + 73)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 144) + 73)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 144) + 73)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 144) + 82)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 144) + 82)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 144) + 82)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 144) + 82)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 144) + 82)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 144) + 82)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 144) + 82)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 144) + 91)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 144) + 91)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 144) + 91)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 144) + 91)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 144) + 91)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 144) + 91)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 144) + 91)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[82] * kernel_shared[((((int)threadIdx.x) * 144) + 100)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[83] * kernel_shared[((((int)threadIdx.x) * 144) + 100)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[84] * kernel_shared[((((int)threadIdx.x) * 144) + 100)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[85] * kernel_shared[((((int)threadIdx.x) * 144) + 100)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[86] * kernel_shared[((((int)threadIdx.x) * 144) + 100)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[87] * kernel_shared[((((int)threadIdx.x) * 144) + 100)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[88] * kernel_shared[((((int)threadIdx.x) * 144) + 100)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 144) + 2)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 144) + 2)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 144) + 2)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 144) + 2)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 144) + 2)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 144) + 2)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 144) + 2)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 144) + 11)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 144) + 11)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 144) + 11)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 144) + 11)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 144) + 11)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 144) + 11)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 144) + 11)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 144) + 20)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 144) + 20)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 144) + 20)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 144) + 20)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 144) + 20)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 144) + 20)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 144) + 20)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[83] * kernel_shared[((((int)threadIdx.x) * 144) + 29)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[84] * kernel_shared[((((int)threadIdx.x) * 144) + 29)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[85] * kernel_shared[((((int)threadIdx.x) * 144) + 29)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[86] * kernel_shared[((((int)threadIdx.x) * 144) + 29)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[87] * kernel_shared[((((int)threadIdx.x) * 144) + 29)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[88] * kernel_shared[((((int)threadIdx.x) * 144) + 29)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[89] * kernel_shared[((((int)threadIdx.x) * 144) + 29)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 144) + 74)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 144) + 74)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 144) + 74)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 144) + 74)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 144) + 74)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 144) + 74)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 144) + 74)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 144) + 83)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 144) + 83)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 144) + 83)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 144) + 83)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 144) + 83)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 144) + 83)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 144) + 83)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 144) + 92)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 144) + 92)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 144) + 92)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 144) + 92)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 144) + 92)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 144) + 92)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 144) + 92)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[83] * kernel_shared[((((int)threadIdx.x) * 144) + 101)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[84] * kernel_shared[((((int)threadIdx.x) * 144) + 101)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[85] * kernel_shared[((((int)threadIdx.x) * 144) + 101)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[86] * kernel_shared[((((int)threadIdx.x) * 144) + 101)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[87] * kernel_shared[((((int)threadIdx.x) * 144) + 101)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[88] * kernel_shared[((((int)threadIdx.x) * 144) + 101)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[89] * kernel_shared[((((int)threadIdx.x) * 144) + 101)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 144) + 3)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 144) + 3)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 144) + 3)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 144) + 3)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 144) + 3)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 144) + 3)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 144) + 3)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 144) + 12)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 144) + 12)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 144) + 12)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 144) + 12)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 144) + 12)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 144) + 12)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 144) + 12)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 144) + 21)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 144) + 21)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 144) + 21)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 144) + 21)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 144) + 21)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 144) + 21)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 144) + 21)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[90] * kernel_shared[((((int)threadIdx.x) * 144) + 30)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[91] * kernel_shared[((((int)threadIdx.x) * 144) + 30)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[92] * kernel_shared[((((int)threadIdx.x) * 144) + 30)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[93] * kernel_shared[((((int)threadIdx.x) * 144) + 30)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[94] * kernel_shared[((((int)threadIdx.x) * 144) + 30)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[95] * kernel_shared[((((int)threadIdx.x) * 144) + 30)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[96] * kernel_shared[((((int)threadIdx.x) * 144) + 30)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 144) + 75)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 144) + 75)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 144) + 75)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 144) + 75)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 144) + 75)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 144) + 75)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 144) + 75)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 144) + 84)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 144) + 84)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 144) + 84)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 144) + 84)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 144) + 84)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 144) + 84)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 144) + 84)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 144) + 93)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 144) + 93)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 144) + 93)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 144) + 93)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 144) + 93)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 144) + 93)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 144) + 93)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[90] * kernel_shared[((((int)threadIdx.x) * 144) + 102)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[91] * kernel_shared[((((int)threadIdx.x) * 144) + 102)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[92] * kernel_shared[((((int)threadIdx.x) * 144) + 102)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[93] * kernel_shared[((((int)threadIdx.x) * 144) + 102)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[94] * kernel_shared[((((int)threadIdx.x) * 144) + 102)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[95] * kernel_shared[((((int)threadIdx.x) * 144) + 102)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[96] * kernel_shared[((((int)threadIdx.x) * 144) + 102)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 144) + 4)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 144) + 4)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 144) + 4)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 144) + 4)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 144) + 4)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 144) + 4)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 144) + 4)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 144) + 13)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 144) + 13)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 144) + 13)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 144) + 13)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 144) + 13)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 144) + 13)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 144) + 13)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 144) + 22)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 144) + 22)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 144) + 22)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 144) + 22)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 144) + 22)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 144) + 22)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 144) + 22)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[91] * kernel_shared[((((int)threadIdx.x) * 144) + 31)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[92] * kernel_shared[((((int)threadIdx.x) * 144) + 31)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[93] * kernel_shared[((((int)threadIdx.x) * 144) + 31)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[94] * kernel_shared[((((int)threadIdx.x) * 144) + 31)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[95] * kernel_shared[((((int)threadIdx.x) * 144) + 31)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[96] * kernel_shared[((((int)threadIdx.x) * 144) + 31)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[97] * kernel_shared[((((int)threadIdx.x) * 144) + 31)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 144) + 76)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 144) + 76)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 144) + 76)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 144) + 76)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 144) + 76)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 144) + 76)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 144) + 76)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 144) + 85)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 144) + 85)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 144) + 85)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 144) + 85)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 144) + 85)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 144) + 85)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 144) + 85)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 144) + 94)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 144) + 94)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 144) + 94)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 144) + 94)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 144) + 94)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 144) + 94)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 144) + 94)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[91] * kernel_shared[((((int)threadIdx.x) * 144) + 103)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[92] * kernel_shared[((((int)threadIdx.x) * 144) + 103)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[93] * kernel_shared[((((int)threadIdx.x) * 144) + 103)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[94] * kernel_shared[((((int)threadIdx.x) * 144) + 103)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[95] * kernel_shared[((((int)threadIdx.x) * 144) + 103)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[96] * kernel_shared[((((int)threadIdx.x) * 144) + 103)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[97] * kernel_shared[((((int)threadIdx.x) * 144) + 103)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 144) + 5)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 144) + 5)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 144) + 5)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 144) + 5)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 144) + 5)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 144) + 5)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 144) + 5)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 144) + 14)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 144) + 14)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 144) + 14)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 144) + 14)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 144) + 14)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 144) + 14)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 144) + 14)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 144) + 23)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 144) + 23)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 144) + 23)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 144) + 23)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 144) + 23)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 144) + 23)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 144) + 23)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[92] * kernel_shared[((((int)threadIdx.x) * 144) + 32)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[93] * kernel_shared[((((int)threadIdx.x) * 144) + 32)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[94] * kernel_shared[((((int)threadIdx.x) * 144) + 32)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[95] * kernel_shared[((((int)threadIdx.x) * 144) + 32)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[96] * kernel_shared[((((int)threadIdx.x) * 144) + 32)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[97] * kernel_shared[((((int)threadIdx.x) * 144) + 32)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[98] * kernel_shared[((((int)threadIdx.x) * 144) + 32)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 144) + 77)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 144) + 77)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 144) + 77)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 144) + 77)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 144) + 77)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 144) + 77)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 144) + 77)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 144) + 86)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 144) + 86)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 144) + 86)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 144) + 86)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 144) + 86)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 144) + 86)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 144) + 86)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 144) + 95)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 144) + 95)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 144) + 95)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 144) + 95)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 144) + 95)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 144) + 95)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 144) + 95)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[92] * kernel_shared[((((int)threadIdx.x) * 144) + 104)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[93] * kernel_shared[((((int)threadIdx.x) * 144) + 104)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[94] * kernel_shared[((((int)threadIdx.x) * 144) + 104)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[95] * kernel_shared[((((int)threadIdx.x) * 144) + 104)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[96] * kernel_shared[((((int)threadIdx.x) * 144) + 104)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[97] * kernel_shared[((((int)threadIdx.x) * 144) + 104)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[98] * kernel_shared[((((int)threadIdx.x) * 144) + 104)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 144) + 6)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 144) + 6)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 144) + 6)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 144) + 6)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 144) + 6)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 144) + 6)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 144) + 6)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 144) + 15)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 144) + 15)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 144) + 15)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 144) + 15)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 144) + 15)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 144) + 15)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 144) + 15)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[72] * kernel_shared[((((int)threadIdx.x) * 144) + 24)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[73] * kernel_shared[((((int)threadIdx.x) * 144) + 24)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[74] * kernel_shared[((((int)threadIdx.x) * 144) + 24)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[75] * kernel_shared[((((int)threadIdx.x) * 144) + 24)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[76] * kernel_shared[((((int)threadIdx.x) * 144) + 24)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[77] * kernel_shared[((((int)threadIdx.x) * 144) + 24)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[78] * kernel_shared[((((int)threadIdx.x) * 144) + 24)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[99] * kernel_shared[((((int)threadIdx.x) * 144) + 33)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[100] * kernel_shared[((((int)threadIdx.x) * 144) + 33)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[101] * kernel_shared[((((int)threadIdx.x) * 144) + 33)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[102] * kernel_shared[((((int)threadIdx.x) * 144) + 33)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[103] * kernel_shared[((((int)threadIdx.x) * 144) + 33)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[104] * kernel_shared[((((int)threadIdx.x) * 144) + 33)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[105] * kernel_shared[((((int)threadIdx.x) * 144) + 33)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 144) + 78)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 144) + 78)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 144) + 78)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 144) + 78)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 144) + 78)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 144) + 78)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 144) + 78)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 144) + 87)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 144) + 87)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 144) + 87)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 144) + 87)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 144) + 87)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 144) + 87)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 144) + 87)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[72] * kernel_shared[((((int)threadIdx.x) * 144) + 96)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[73] * kernel_shared[((((int)threadIdx.x) * 144) + 96)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[74] * kernel_shared[((((int)threadIdx.x) * 144) + 96)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[75] * kernel_shared[((((int)threadIdx.x) * 144) + 96)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[76] * kernel_shared[((((int)threadIdx.x) * 144) + 96)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[77] * kernel_shared[((((int)threadIdx.x) * 144) + 96)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[78] * kernel_shared[((((int)threadIdx.x) * 144) + 96)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[99] * kernel_shared[((((int)threadIdx.x) * 144) + 105)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[100] * kernel_shared[((((int)threadIdx.x) * 144) + 105)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[101] * kernel_shared[((((int)threadIdx.x) * 144) + 105)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[102] * kernel_shared[((((int)threadIdx.x) * 144) + 105)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[103] * kernel_shared[((((int)threadIdx.x) * 144) + 105)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[104] * kernel_shared[((((int)threadIdx.x) * 144) + 105)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[105] * kernel_shared[((((int)threadIdx.x) * 144) + 105)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 144) + 7)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 144) + 7)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 144) + 7)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 144) + 7)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 144) + 7)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 144) + 7)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 144) + 7)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 144) + 16)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 144) + 16)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 144) + 16)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 144) + 16)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 144) + 16)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 144) + 16)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 144) + 16)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[73] * kernel_shared[((((int)threadIdx.x) * 144) + 25)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[74] * kernel_shared[((((int)threadIdx.x) * 144) + 25)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[75] * kernel_shared[((((int)threadIdx.x) * 144) + 25)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[76] * kernel_shared[((((int)threadIdx.x) * 144) + 25)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[77] * kernel_shared[((((int)threadIdx.x) * 144) + 25)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[78] * kernel_shared[((((int)threadIdx.x) * 144) + 25)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[79] * kernel_shared[((((int)threadIdx.x) * 144) + 25)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[100] * kernel_shared[((((int)threadIdx.x) * 144) + 34)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[101] * kernel_shared[((((int)threadIdx.x) * 144) + 34)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[102] * kernel_shared[((((int)threadIdx.x) * 144) + 34)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[103] * kernel_shared[((((int)threadIdx.x) * 144) + 34)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[104] * kernel_shared[((((int)threadIdx.x) * 144) + 34)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[105] * kernel_shared[((((int)threadIdx.x) * 144) + 34)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[106] * kernel_shared[((((int)threadIdx.x) * 144) + 34)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 144) + 79)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 144) + 79)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 144) + 79)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 144) + 79)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 144) + 79)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 144) + 79)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 144) + 79)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 144) + 88)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 144) + 88)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 144) + 88)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 144) + 88)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 144) + 88)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 144) + 88)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 144) + 88)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[73] * kernel_shared[((((int)threadIdx.x) * 144) + 97)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[74] * kernel_shared[((((int)threadIdx.x) * 144) + 97)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[75] * kernel_shared[((((int)threadIdx.x) * 144) + 97)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[76] * kernel_shared[((((int)threadIdx.x) * 144) + 97)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[77] * kernel_shared[((((int)threadIdx.x) * 144) + 97)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[78] * kernel_shared[((((int)threadIdx.x) * 144) + 97)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[79] * kernel_shared[((((int)threadIdx.x) * 144) + 97)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[100] * kernel_shared[((((int)threadIdx.x) * 144) + 106)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[101] * kernel_shared[((((int)threadIdx.x) * 144) + 106)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[102] * kernel_shared[((((int)threadIdx.x) * 144) + 106)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[103] * kernel_shared[((((int)threadIdx.x) * 144) + 106)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[104] * kernel_shared[((((int)threadIdx.x) * 144) + 106)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[105] * kernel_shared[((((int)threadIdx.x) * 144) + 106)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[106] * kernel_shared[((((int)threadIdx.x) * 144) + 106)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 144) + 8)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 144) + 8)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 144) + 8)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 144) + 8)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 144) + 8)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 144) + 8)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 144) + 8)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 144) + 17)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 144) + 17)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 144) + 17)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 144) + 17)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 144) + 17)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 144) + 17)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 144) + 17)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[74] * kernel_shared[((((int)threadIdx.x) * 144) + 26)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[75] * kernel_shared[((((int)threadIdx.x) * 144) + 26)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[76] * kernel_shared[((((int)threadIdx.x) * 144) + 26)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[77] * kernel_shared[((((int)threadIdx.x) * 144) + 26)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[78] * kernel_shared[((((int)threadIdx.x) * 144) + 26)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[79] * kernel_shared[((((int)threadIdx.x) * 144) + 26)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[80] * kernel_shared[((((int)threadIdx.x) * 144) + 26)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[101] * kernel_shared[((((int)threadIdx.x) * 144) + 35)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[102] * kernel_shared[((((int)threadIdx.x) * 144) + 35)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[103] * kernel_shared[((((int)threadIdx.x) * 144) + 35)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[104] * kernel_shared[((((int)threadIdx.x) * 144) + 35)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[105] * kernel_shared[((((int)threadIdx.x) * 144) + 35)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[106] * kernel_shared[((((int)threadIdx.x) * 144) + 35)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[107] * kernel_shared[((((int)threadIdx.x) * 144) + 35)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 144) + 80)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 144) + 80)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 144) + 80)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 144) + 80)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 144) + 80)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 144) + 80)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 144) + 80)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 144) + 89)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 144) + 89)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 144) + 89)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 144) + 89)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 144) + 89)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 144) + 89)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 144) + 89)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[74] * kernel_shared[((((int)threadIdx.x) * 144) + 98)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[75] * kernel_shared[((((int)threadIdx.x) * 144) + 98)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[76] * kernel_shared[((((int)threadIdx.x) * 144) + 98)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[77] * kernel_shared[((((int)threadIdx.x) * 144) + 98)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[78] * kernel_shared[((((int)threadIdx.x) * 144) + 98)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[79] * kernel_shared[((((int)threadIdx.x) * 144) + 98)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[80] * kernel_shared[((((int)threadIdx.x) * 144) + 98)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[101] * kernel_shared[((((int)threadIdx.x) * 144) + 107)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[102] * kernel_shared[((((int)threadIdx.x) * 144) + 107)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[103] * kernel_shared[((((int)threadIdx.x) * 144) + 107)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[104] * kernel_shared[((((int)threadIdx.x) * 144) + 107)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[105] * kernel_shared[((((int)threadIdx.x) * 144) + 107)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[106] * kernel_shared[((((int)threadIdx.x) * 144) + 107)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[107] * kernel_shared[((((int)threadIdx.x) * 144) + 107)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[108] * kernel_shared[((((int)threadIdx.x) * 144) + 36)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[109] * kernel_shared[((((int)threadIdx.x) * 144) + 36)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[110] * kernel_shared[((((int)threadIdx.x) * 144) + 36)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[111] * kernel_shared[((((int)threadIdx.x) * 144) + 36)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[112] * kernel_shared[((((int)threadIdx.x) * 144) + 36)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[113] * kernel_shared[((((int)threadIdx.x) * 144) + 36)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[114] * kernel_shared[((((int)threadIdx.x) * 144) + 36)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[135] * kernel_shared[((((int)threadIdx.x) * 144) + 45)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[136] * kernel_shared[((((int)threadIdx.x) * 144) + 45)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[137] * kernel_shared[((((int)threadIdx.x) * 144) + 45)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[138] * kernel_shared[((((int)threadIdx.x) * 144) + 45)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[139] * kernel_shared[((((int)threadIdx.x) * 144) + 45)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[140] * kernel_shared[((((int)threadIdx.x) * 144) + 45)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[141] * kernel_shared[((((int)threadIdx.x) * 144) + 45)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[162] * kernel_shared[((((int)threadIdx.x) * 144) + 54)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[163] * kernel_shared[((((int)threadIdx.x) * 144) + 54)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[164] * kernel_shared[((((int)threadIdx.x) * 144) + 54)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[165] * kernel_shared[((((int)threadIdx.x) * 144) + 54)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[166] * kernel_shared[((((int)threadIdx.x) * 144) + 54)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[167] * kernel_shared[((((int)threadIdx.x) * 144) + 54)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[168] * kernel_shared[((((int)threadIdx.x) * 144) + 54)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[189] * kernel_shared[((((int)threadIdx.x) * 144) + 63)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[190] * kernel_shared[((((int)threadIdx.x) * 144) + 63)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[191] * kernel_shared[((((int)threadIdx.x) * 144) + 63)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[192] * kernel_shared[((((int)threadIdx.x) * 144) + 63)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[193] * kernel_shared[((((int)threadIdx.x) * 144) + 63)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[194] * kernel_shared[((((int)threadIdx.x) * 144) + 63)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[195] * kernel_shared[((((int)threadIdx.x) * 144) + 63)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[108] * kernel_shared[((((int)threadIdx.x) * 144) + 108)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[109] * kernel_shared[((((int)threadIdx.x) * 144) + 108)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[110] * kernel_shared[((((int)threadIdx.x) * 144) + 108)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[111] * kernel_shared[((((int)threadIdx.x) * 144) + 108)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[112] * kernel_shared[((((int)threadIdx.x) * 144) + 108)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[113] * kernel_shared[((((int)threadIdx.x) * 144) + 108)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[114] * kernel_shared[((((int)threadIdx.x) * 144) + 108)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[135] * kernel_shared[((((int)threadIdx.x) * 144) + 117)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[136] * kernel_shared[((((int)threadIdx.x) * 144) + 117)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[137] * kernel_shared[((((int)threadIdx.x) * 144) + 117)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[138] * kernel_shared[((((int)threadIdx.x) * 144) + 117)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[139] * kernel_shared[((((int)threadIdx.x) * 144) + 117)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[140] * kernel_shared[((((int)threadIdx.x) * 144) + 117)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[141] * kernel_shared[((((int)threadIdx.x) * 144) + 117)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[162] * kernel_shared[((((int)threadIdx.x) * 144) + 126)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[163] * kernel_shared[((((int)threadIdx.x) * 144) + 126)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[164] * kernel_shared[((((int)threadIdx.x) * 144) + 126)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[165] * kernel_shared[((((int)threadIdx.x) * 144) + 126)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[166] * kernel_shared[((((int)threadIdx.x) * 144) + 126)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[167] * kernel_shared[((((int)threadIdx.x) * 144) + 126)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[168] * kernel_shared[((((int)threadIdx.x) * 144) + 126)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[189] * kernel_shared[((((int)threadIdx.x) * 144) + 135)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[190] * kernel_shared[((((int)threadIdx.x) * 144) + 135)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[191] * kernel_shared[((((int)threadIdx.x) * 144) + 135)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[192] * kernel_shared[((((int)threadIdx.x) * 144) + 135)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[193] * kernel_shared[((((int)threadIdx.x) * 144) + 135)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[194] * kernel_shared[((((int)threadIdx.x) * 144) + 135)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[195] * kernel_shared[((((int)threadIdx.x) * 144) + 135)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[109] * kernel_shared[((((int)threadIdx.x) * 144) + 37)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[110] * kernel_shared[((((int)threadIdx.x) * 144) + 37)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[111] * kernel_shared[((((int)threadIdx.x) * 144) + 37)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[112] * kernel_shared[((((int)threadIdx.x) * 144) + 37)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[113] * kernel_shared[((((int)threadIdx.x) * 144) + 37)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[114] * kernel_shared[((((int)threadIdx.x) * 144) + 37)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[115] * kernel_shared[((((int)threadIdx.x) * 144) + 37)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[136] * kernel_shared[((((int)threadIdx.x) * 144) + 46)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[137] * kernel_shared[((((int)threadIdx.x) * 144) + 46)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[138] * kernel_shared[((((int)threadIdx.x) * 144) + 46)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[139] * kernel_shared[((((int)threadIdx.x) * 144) + 46)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[140] * kernel_shared[((((int)threadIdx.x) * 144) + 46)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[141] * kernel_shared[((((int)threadIdx.x) * 144) + 46)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[142] * kernel_shared[((((int)threadIdx.x) * 144) + 46)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[163] * kernel_shared[((((int)threadIdx.x) * 144) + 55)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[164] * kernel_shared[((((int)threadIdx.x) * 144) + 55)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[165] * kernel_shared[((((int)threadIdx.x) * 144) + 55)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[166] * kernel_shared[((((int)threadIdx.x) * 144) + 55)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[167] * kernel_shared[((((int)threadIdx.x) * 144) + 55)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[168] * kernel_shared[((((int)threadIdx.x) * 144) + 55)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[169] * kernel_shared[((((int)threadIdx.x) * 144) + 55)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[190] * kernel_shared[((((int)threadIdx.x) * 144) + 64)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[191] * kernel_shared[((((int)threadIdx.x) * 144) + 64)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[192] * kernel_shared[((((int)threadIdx.x) * 144) + 64)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[193] * kernel_shared[((((int)threadIdx.x) * 144) + 64)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[194] * kernel_shared[((((int)threadIdx.x) * 144) + 64)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[195] * kernel_shared[((((int)threadIdx.x) * 144) + 64)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[196] * kernel_shared[((((int)threadIdx.x) * 144) + 64)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[109] * kernel_shared[((((int)threadIdx.x) * 144) + 109)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[110] * kernel_shared[((((int)threadIdx.x) * 144) + 109)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[111] * kernel_shared[((((int)threadIdx.x) * 144) + 109)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[112] * kernel_shared[((((int)threadIdx.x) * 144) + 109)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[113] * kernel_shared[((((int)threadIdx.x) * 144) + 109)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[114] * kernel_shared[((((int)threadIdx.x) * 144) + 109)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[115] * kernel_shared[((((int)threadIdx.x) * 144) + 109)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[136] * kernel_shared[((((int)threadIdx.x) * 144) + 118)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[137] * kernel_shared[((((int)threadIdx.x) * 144) + 118)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[138] * kernel_shared[((((int)threadIdx.x) * 144) + 118)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[139] * kernel_shared[((((int)threadIdx.x) * 144) + 118)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[140] * kernel_shared[((((int)threadIdx.x) * 144) + 118)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[141] * kernel_shared[((((int)threadIdx.x) * 144) + 118)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[142] * kernel_shared[((((int)threadIdx.x) * 144) + 118)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[163] * kernel_shared[((((int)threadIdx.x) * 144) + 127)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[164] * kernel_shared[((((int)threadIdx.x) * 144) + 127)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[165] * kernel_shared[((((int)threadIdx.x) * 144) + 127)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[166] * kernel_shared[((((int)threadIdx.x) * 144) + 127)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[167] * kernel_shared[((((int)threadIdx.x) * 144) + 127)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[168] * kernel_shared[((((int)threadIdx.x) * 144) + 127)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[169] * kernel_shared[((((int)threadIdx.x) * 144) + 127)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[190] * kernel_shared[((((int)threadIdx.x) * 144) + 136)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[191] * kernel_shared[((((int)threadIdx.x) * 144) + 136)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[192] * kernel_shared[((((int)threadIdx.x) * 144) + 136)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[193] * kernel_shared[((((int)threadIdx.x) * 144) + 136)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[194] * kernel_shared[((((int)threadIdx.x) * 144) + 136)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[195] * kernel_shared[((((int)threadIdx.x) * 144) + 136)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[196] * kernel_shared[((((int)threadIdx.x) * 144) + 136)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[110] * kernel_shared[((((int)threadIdx.x) * 144) + 38)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[111] * kernel_shared[((((int)threadIdx.x) * 144) + 38)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[112] * kernel_shared[((((int)threadIdx.x) * 144) + 38)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[113] * kernel_shared[((((int)threadIdx.x) * 144) + 38)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[114] * kernel_shared[((((int)threadIdx.x) * 144) + 38)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[115] * kernel_shared[((((int)threadIdx.x) * 144) + 38)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[116] * kernel_shared[((((int)threadIdx.x) * 144) + 38)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[137] * kernel_shared[((((int)threadIdx.x) * 144) + 47)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[138] * kernel_shared[((((int)threadIdx.x) * 144) + 47)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[139] * kernel_shared[((((int)threadIdx.x) * 144) + 47)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[140] * kernel_shared[((((int)threadIdx.x) * 144) + 47)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[141] * kernel_shared[((((int)threadIdx.x) * 144) + 47)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[142] * kernel_shared[((((int)threadIdx.x) * 144) + 47)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[143] * kernel_shared[((((int)threadIdx.x) * 144) + 47)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[164] * kernel_shared[((((int)threadIdx.x) * 144) + 56)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[165] * kernel_shared[((((int)threadIdx.x) * 144) + 56)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[166] * kernel_shared[((((int)threadIdx.x) * 144) + 56)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[167] * kernel_shared[((((int)threadIdx.x) * 144) + 56)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[168] * kernel_shared[((((int)threadIdx.x) * 144) + 56)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[169] * kernel_shared[((((int)threadIdx.x) * 144) + 56)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[170] * kernel_shared[((((int)threadIdx.x) * 144) + 56)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[191] * kernel_shared[((((int)threadIdx.x) * 144) + 65)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[192] * kernel_shared[((((int)threadIdx.x) * 144) + 65)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[193] * kernel_shared[((((int)threadIdx.x) * 144) + 65)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[194] * kernel_shared[((((int)threadIdx.x) * 144) + 65)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[195] * kernel_shared[((((int)threadIdx.x) * 144) + 65)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[196] * kernel_shared[((((int)threadIdx.x) * 144) + 65)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[197] * kernel_shared[((((int)threadIdx.x) * 144) + 65)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[110] * kernel_shared[((((int)threadIdx.x) * 144) + 110)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[111] * kernel_shared[((((int)threadIdx.x) * 144) + 110)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[112] * kernel_shared[((((int)threadIdx.x) * 144) + 110)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[113] * kernel_shared[((((int)threadIdx.x) * 144) + 110)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[114] * kernel_shared[((((int)threadIdx.x) * 144) + 110)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[115] * kernel_shared[((((int)threadIdx.x) * 144) + 110)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[116] * kernel_shared[((((int)threadIdx.x) * 144) + 110)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[137] * kernel_shared[((((int)threadIdx.x) * 144) + 119)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[138] * kernel_shared[((((int)threadIdx.x) * 144) + 119)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[139] * kernel_shared[((((int)threadIdx.x) * 144) + 119)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[140] * kernel_shared[((((int)threadIdx.x) * 144) + 119)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[141] * kernel_shared[((((int)threadIdx.x) * 144) + 119)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[142] * kernel_shared[((((int)threadIdx.x) * 144) + 119)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[143] * kernel_shared[((((int)threadIdx.x) * 144) + 119)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[164] * kernel_shared[((((int)threadIdx.x) * 144) + 128)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[165] * kernel_shared[((((int)threadIdx.x) * 144) + 128)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[166] * kernel_shared[((((int)threadIdx.x) * 144) + 128)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[167] * kernel_shared[((((int)threadIdx.x) * 144) + 128)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[168] * kernel_shared[((((int)threadIdx.x) * 144) + 128)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[169] * kernel_shared[((((int)threadIdx.x) * 144) + 128)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[170] * kernel_shared[((((int)threadIdx.x) * 144) + 128)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[191] * kernel_shared[((((int)threadIdx.x) * 144) + 137)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[192] * kernel_shared[((((int)threadIdx.x) * 144) + 137)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[193] * kernel_shared[((((int)threadIdx.x) * 144) + 137)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[194] * kernel_shared[((((int)threadIdx.x) * 144) + 137)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[195] * kernel_shared[((((int)threadIdx.x) * 144) + 137)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[196] * kernel_shared[((((int)threadIdx.x) * 144) + 137)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[197] * kernel_shared[((((int)threadIdx.x) * 144) + 137)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[117] * kernel_shared[((((int)threadIdx.x) * 144) + 39)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[118] * kernel_shared[((((int)threadIdx.x) * 144) + 39)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[119] * kernel_shared[((((int)threadIdx.x) * 144) + 39)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[120] * kernel_shared[((((int)threadIdx.x) * 144) + 39)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[121] * kernel_shared[((((int)threadIdx.x) * 144) + 39)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[122] * kernel_shared[((((int)threadIdx.x) * 144) + 39)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[123] * kernel_shared[((((int)threadIdx.x) * 144) + 39)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[144] * kernel_shared[((((int)threadIdx.x) * 144) + 48)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[145] * kernel_shared[((((int)threadIdx.x) * 144) + 48)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[146] * kernel_shared[((((int)threadIdx.x) * 144) + 48)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[147] * kernel_shared[((((int)threadIdx.x) * 144) + 48)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[148] * kernel_shared[((((int)threadIdx.x) * 144) + 48)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[149] * kernel_shared[((((int)threadIdx.x) * 144) + 48)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[150] * kernel_shared[((((int)threadIdx.x) * 144) + 48)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[171] * kernel_shared[((((int)threadIdx.x) * 144) + 57)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[172] * kernel_shared[((((int)threadIdx.x) * 144) + 57)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[173] * kernel_shared[((((int)threadIdx.x) * 144) + 57)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[174] * kernel_shared[((((int)threadIdx.x) * 144) + 57)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[175] * kernel_shared[((((int)threadIdx.x) * 144) + 57)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[176] * kernel_shared[((((int)threadIdx.x) * 144) + 57)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[177] * kernel_shared[((((int)threadIdx.x) * 144) + 57)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[198] * kernel_shared[((((int)threadIdx.x) * 144) + 66)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[199] * kernel_shared[((((int)threadIdx.x) * 144) + 66)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[200] * kernel_shared[((((int)threadIdx.x) * 144) + 66)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[201] * kernel_shared[((((int)threadIdx.x) * 144) + 66)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[202] * kernel_shared[((((int)threadIdx.x) * 144) + 66)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[203] * kernel_shared[((((int)threadIdx.x) * 144) + 66)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[204] * kernel_shared[((((int)threadIdx.x) * 144) + 66)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[117] * kernel_shared[((((int)threadIdx.x) * 144) + 111)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[118] * kernel_shared[((((int)threadIdx.x) * 144) + 111)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[119] * kernel_shared[((((int)threadIdx.x) * 144) + 111)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[120] * kernel_shared[((((int)threadIdx.x) * 144) + 111)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[121] * kernel_shared[((((int)threadIdx.x) * 144) + 111)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[122] * kernel_shared[((((int)threadIdx.x) * 144) + 111)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[123] * kernel_shared[((((int)threadIdx.x) * 144) + 111)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[144] * kernel_shared[((((int)threadIdx.x) * 144) + 120)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[145] * kernel_shared[((((int)threadIdx.x) * 144) + 120)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[146] * kernel_shared[((((int)threadIdx.x) * 144) + 120)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[147] * kernel_shared[((((int)threadIdx.x) * 144) + 120)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[148] * kernel_shared[((((int)threadIdx.x) * 144) + 120)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[149] * kernel_shared[((((int)threadIdx.x) * 144) + 120)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[150] * kernel_shared[((((int)threadIdx.x) * 144) + 120)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[171] * kernel_shared[((((int)threadIdx.x) * 144) + 129)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[172] * kernel_shared[((((int)threadIdx.x) * 144) + 129)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[173] * kernel_shared[((((int)threadIdx.x) * 144) + 129)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[174] * kernel_shared[((((int)threadIdx.x) * 144) + 129)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[175] * kernel_shared[((((int)threadIdx.x) * 144) + 129)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[176] * kernel_shared[((((int)threadIdx.x) * 144) + 129)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[177] * kernel_shared[((((int)threadIdx.x) * 144) + 129)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[198] * kernel_shared[((((int)threadIdx.x) * 144) + 138)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[199] * kernel_shared[((((int)threadIdx.x) * 144) + 138)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[200] * kernel_shared[((((int)threadIdx.x) * 144) + 138)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[201] * kernel_shared[((((int)threadIdx.x) * 144) + 138)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[202] * kernel_shared[((((int)threadIdx.x) * 144) + 138)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[203] * kernel_shared[((((int)threadIdx.x) * 144) + 138)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[204] * kernel_shared[((((int)threadIdx.x) * 144) + 138)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[118] * kernel_shared[((((int)threadIdx.x) * 144) + 40)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[119] * kernel_shared[((((int)threadIdx.x) * 144) + 40)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[120] * kernel_shared[((((int)threadIdx.x) * 144) + 40)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[121] * kernel_shared[((((int)threadIdx.x) * 144) + 40)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[122] * kernel_shared[((((int)threadIdx.x) * 144) + 40)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[123] * kernel_shared[((((int)threadIdx.x) * 144) + 40)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[124] * kernel_shared[((((int)threadIdx.x) * 144) + 40)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[145] * kernel_shared[((((int)threadIdx.x) * 144) + 49)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[146] * kernel_shared[((((int)threadIdx.x) * 144) + 49)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[147] * kernel_shared[((((int)threadIdx.x) * 144) + 49)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[148] * kernel_shared[((((int)threadIdx.x) * 144) + 49)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[149] * kernel_shared[((((int)threadIdx.x) * 144) + 49)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[150] * kernel_shared[((((int)threadIdx.x) * 144) + 49)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[151] * kernel_shared[((((int)threadIdx.x) * 144) + 49)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[172] * kernel_shared[((((int)threadIdx.x) * 144) + 58)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[173] * kernel_shared[((((int)threadIdx.x) * 144) + 58)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[174] * kernel_shared[((((int)threadIdx.x) * 144) + 58)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[175] * kernel_shared[((((int)threadIdx.x) * 144) + 58)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[176] * kernel_shared[((((int)threadIdx.x) * 144) + 58)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[177] * kernel_shared[((((int)threadIdx.x) * 144) + 58)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[178] * kernel_shared[((((int)threadIdx.x) * 144) + 58)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[199] * kernel_shared[((((int)threadIdx.x) * 144) + 67)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[200] * kernel_shared[((((int)threadIdx.x) * 144) + 67)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[201] * kernel_shared[((((int)threadIdx.x) * 144) + 67)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[202] * kernel_shared[((((int)threadIdx.x) * 144) + 67)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[203] * kernel_shared[((((int)threadIdx.x) * 144) + 67)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[204] * kernel_shared[((((int)threadIdx.x) * 144) + 67)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[205] * kernel_shared[((((int)threadIdx.x) * 144) + 67)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[118] * kernel_shared[((((int)threadIdx.x) * 144) + 112)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[119] * kernel_shared[((((int)threadIdx.x) * 144) + 112)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[120] * kernel_shared[((((int)threadIdx.x) * 144) + 112)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[121] * kernel_shared[((((int)threadIdx.x) * 144) + 112)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[122] * kernel_shared[((((int)threadIdx.x) * 144) + 112)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[123] * kernel_shared[((((int)threadIdx.x) * 144) + 112)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[124] * kernel_shared[((((int)threadIdx.x) * 144) + 112)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[145] * kernel_shared[((((int)threadIdx.x) * 144) + 121)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[146] * kernel_shared[((((int)threadIdx.x) * 144) + 121)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[147] * kernel_shared[((((int)threadIdx.x) * 144) + 121)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[148] * kernel_shared[((((int)threadIdx.x) * 144) + 121)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[149] * kernel_shared[((((int)threadIdx.x) * 144) + 121)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[150] * kernel_shared[((((int)threadIdx.x) * 144) + 121)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[151] * kernel_shared[((((int)threadIdx.x) * 144) + 121)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[172] * kernel_shared[((((int)threadIdx.x) * 144) + 130)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[173] * kernel_shared[((((int)threadIdx.x) * 144) + 130)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[174] * kernel_shared[((((int)threadIdx.x) * 144) + 130)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[175] * kernel_shared[((((int)threadIdx.x) * 144) + 130)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[176] * kernel_shared[((((int)threadIdx.x) * 144) + 130)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[177] * kernel_shared[((((int)threadIdx.x) * 144) + 130)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[178] * kernel_shared[((((int)threadIdx.x) * 144) + 130)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[199] * kernel_shared[((((int)threadIdx.x) * 144) + 139)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[200] * kernel_shared[((((int)threadIdx.x) * 144) + 139)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[201] * kernel_shared[((((int)threadIdx.x) * 144) + 139)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[202] * kernel_shared[((((int)threadIdx.x) * 144) + 139)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[203] * kernel_shared[((((int)threadIdx.x) * 144) + 139)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[204] * kernel_shared[((((int)threadIdx.x) * 144) + 139)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[205] * kernel_shared[((((int)threadIdx.x) * 144) + 139)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[119] * kernel_shared[((((int)threadIdx.x) * 144) + 41)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[120] * kernel_shared[((((int)threadIdx.x) * 144) + 41)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[121] * kernel_shared[((((int)threadIdx.x) * 144) + 41)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[122] * kernel_shared[((((int)threadIdx.x) * 144) + 41)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[123] * kernel_shared[((((int)threadIdx.x) * 144) + 41)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[124] * kernel_shared[((((int)threadIdx.x) * 144) + 41)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[125] * kernel_shared[((((int)threadIdx.x) * 144) + 41)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[146] * kernel_shared[((((int)threadIdx.x) * 144) + 50)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[147] * kernel_shared[((((int)threadIdx.x) * 144) + 50)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[148] * kernel_shared[((((int)threadIdx.x) * 144) + 50)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[149] * kernel_shared[((((int)threadIdx.x) * 144) + 50)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[150] * kernel_shared[((((int)threadIdx.x) * 144) + 50)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[151] * kernel_shared[((((int)threadIdx.x) * 144) + 50)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[152] * kernel_shared[((((int)threadIdx.x) * 144) + 50)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[173] * kernel_shared[((((int)threadIdx.x) * 144) + 59)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[174] * kernel_shared[((((int)threadIdx.x) * 144) + 59)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[175] * kernel_shared[((((int)threadIdx.x) * 144) + 59)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[176] * kernel_shared[((((int)threadIdx.x) * 144) + 59)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[177] * kernel_shared[((((int)threadIdx.x) * 144) + 59)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[178] * kernel_shared[((((int)threadIdx.x) * 144) + 59)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[179] * kernel_shared[((((int)threadIdx.x) * 144) + 59)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[200] * kernel_shared[((((int)threadIdx.x) * 144) + 68)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[201] * kernel_shared[((((int)threadIdx.x) * 144) + 68)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[202] * kernel_shared[((((int)threadIdx.x) * 144) + 68)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[203] * kernel_shared[((((int)threadIdx.x) * 144) + 68)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[204] * kernel_shared[((((int)threadIdx.x) * 144) + 68)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[205] * kernel_shared[((((int)threadIdx.x) * 144) + 68)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[206] * kernel_shared[((((int)threadIdx.x) * 144) + 68)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[119] * kernel_shared[((((int)threadIdx.x) * 144) + 113)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[120] * kernel_shared[((((int)threadIdx.x) * 144) + 113)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[121] * kernel_shared[((((int)threadIdx.x) * 144) + 113)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[122] * kernel_shared[((((int)threadIdx.x) * 144) + 113)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[123] * kernel_shared[((((int)threadIdx.x) * 144) + 113)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[124] * kernel_shared[((((int)threadIdx.x) * 144) + 113)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[125] * kernel_shared[((((int)threadIdx.x) * 144) + 113)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[146] * kernel_shared[((((int)threadIdx.x) * 144) + 122)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[147] * kernel_shared[((((int)threadIdx.x) * 144) + 122)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[148] * kernel_shared[((((int)threadIdx.x) * 144) + 122)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[149] * kernel_shared[((((int)threadIdx.x) * 144) + 122)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[150] * kernel_shared[((((int)threadIdx.x) * 144) + 122)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[151] * kernel_shared[((((int)threadIdx.x) * 144) + 122)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[152] * kernel_shared[((((int)threadIdx.x) * 144) + 122)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[173] * kernel_shared[((((int)threadIdx.x) * 144) + 131)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[174] * kernel_shared[((((int)threadIdx.x) * 144) + 131)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[175] * kernel_shared[((((int)threadIdx.x) * 144) + 131)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[176] * kernel_shared[((((int)threadIdx.x) * 144) + 131)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[177] * kernel_shared[((((int)threadIdx.x) * 144) + 131)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[178] * kernel_shared[((((int)threadIdx.x) * 144) + 131)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[179] * kernel_shared[((((int)threadIdx.x) * 144) + 131)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[200] * kernel_shared[((((int)threadIdx.x) * 144) + 140)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[201] * kernel_shared[((((int)threadIdx.x) * 144) + 140)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[202] * kernel_shared[((((int)threadIdx.x) * 144) + 140)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[203] * kernel_shared[((((int)threadIdx.x) * 144) + 140)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[204] * kernel_shared[((((int)threadIdx.x) * 144) + 140)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[205] * kernel_shared[((((int)threadIdx.x) * 144) + 140)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[206] * kernel_shared[((((int)threadIdx.x) * 144) + 140)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[126] * kernel_shared[((((int)threadIdx.x) * 144) + 42)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[127] * kernel_shared[((((int)threadIdx.x) * 144) + 42)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[128] * kernel_shared[((((int)threadIdx.x) * 144) + 42)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[129] * kernel_shared[((((int)threadIdx.x) * 144) + 42)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[130] * kernel_shared[((((int)threadIdx.x) * 144) + 42)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[131] * kernel_shared[((((int)threadIdx.x) * 144) + 42)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[132] * kernel_shared[((((int)threadIdx.x) * 144) + 42)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[153] * kernel_shared[((((int)threadIdx.x) * 144) + 51)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[154] * kernel_shared[((((int)threadIdx.x) * 144) + 51)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[155] * kernel_shared[((((int)threadIdx.x) * 144) + 51)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[156] * kernel_shared[((((int)threadIdx.x) * 144) + 51)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[157] * kernel_shared[((((int)threadIdx.x) * 144) + 51)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[158] * kernel_shared[((((int)threadIdx.x) * 144) + 51)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[159] * kernel_shared[((((int)threadIdx.x) * 144) + 51)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[180] * kernel_shared[((((int)threadIdx.x) * 144) + 60)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[181] * kernel_shared[((((int)threadIdx.x) * 144) + 60)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[182] * kernel_shared[((((int)threadIdx.x) * 144) + 60)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[183] * kernel_shared[((((int)threadIdx.x) * 144) + 60)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[184] * kernel_shared[((((int)threadIdx.x) * 144) + 60)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[185] * kernel_shared[((((int)threadIdx.x) * 144) + 60)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[186] * kernel_shared[((((int)threadIdx.x) * 144) + 60)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[207] * kernel_shared[((((int)threadIdx.x) * 144) + 69)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[208] * kernel_shared[((((int)threadIdx.x) * 144) + 69)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[209] * kernel_shared[((((int)threadIdx.x) * 144) + 69)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[210] * kernel_shared[((((int)threadIdx.x) * 144) + 69)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[211] * kernel_shared[((((int)threadIdx.x) * 144) + 69)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[212] * kernel_shared[((((int)threadIdx.x) * 144) + 69)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[213] * kernel_shared[((((int)threadIdx.x) * 144) + 69)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[126] * kernel_shared[((((int)threadIdx.x) * 144) + 114)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[127] * kernel_shared[((((int)threadIdx.x) * 144) + 114)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[128] * kernel_shared[((((int)threadIdx.x) * 144) + 114)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[129] * kernel_shared[((((int)threadIdx.x) * 144) + 114)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[130] * kernel_shared[((((int)threadIdx.x) * 144) + 114)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[131] * kernel_shared[((((int)threadIdx.x) * 144) + 114)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[132] * kernel_shared[((((int)threadIdx.x) * 144) + 114)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[153] * kernel_shared[((((int)threadIdx.x) * 144) + 123)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[154] * kernel_shared[((((int)threadIdx.x) * 144) + 123)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[155] * kernel_shared[((((int)threadIdx.x) * 144) + 123)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[156] * kernel_shared[((((int)threadIdx.x) * 144) + 123)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[157] * kernel_shared[((((int)threadIdx.x) * 144) + 123)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[158] * kernel_shared[((((int)threadIdx.x) * 144) + 123)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[159] * kernel_shared[((((int)threadIdx.x) * 144) + 123)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[180] * kernel_shared[((((int)threadIdx.x) * 144) + 132)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[181] * kernel_shared[((((int)threadIdx.x) * 144) + 132)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[182] * kernel_shared[((((int)threadIdx.x) * 144) + 132)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[183] * kernel_shared[((((int)threadIdx.x) * 144) + 132)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[184] * kernel_shared[((((int)threadIdx.x) * 144) + 132)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[185] * kernel_shared[((((int)threadIdx.x) * 144) + 132)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[186] * kernel_shared[((((int)threadIdx.x) * 144) + 132)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[207] * kernel_shared[((((int)threadIdx.x) * 144) + 141)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[208] * kernel_shared[((((int)threadIdx.x) * 144) + 141)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[209] * kernel_shared[((((int)threadIdx.x) * 144) + 141)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[210] * kernel_shared[((((int)threadIdx.x) * 144) + 141)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[211] * kernel_shared[((((int)threadIdx.x) * 144) + 141)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[212] * kernel_shared[((((int)threadIdx.x) * 144) + 141)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[213] * kernel_shared[((((int)threadIdx.x) * 144) + 141)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[127] * kernel_shared[((((int)threadIdx.x) * 144) + 43)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[128] * kernel_shared[((((int)threadIdx.x) * 144) + 43)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[129] * kernel_shared[((((int)threadIdx.x) * 144) + 43)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[130] * kernel_shared[((((int)threadIdx.x) * 144) + 43)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[131] * kernel_shared[((((int)threadIdx.x) * 144) + 43)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[132] * kernel_shared[((((int)threadIdx.x) * 144) + 43)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[133] * kernel_shared[((((int)threadIdx.x) * 144) + 43)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[154] * kernel_shared[((((int)threadIdx.x) * 144) + 52)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[155] * kernel_shared[((((int)threadIdx.x) * 144) + 52)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[156] * kernel_shared[((((int)threadIdx.x) * 144) + 52)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[157] * kernel_shared[((((int)threadIdx.x) * 144) + 52)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[158] * kernel_shared[((((int)threadIdx.x) * 144) + 52)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[159] * kernel_shared[((((int)threadIdx.x) * 144) + 52)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[160] * kernel_shared[((((int)threadIdx.x) * 144) + 52)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[181] * kernel_shared[((((int)threadIdx.x) * 144) + 61)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[182] * kernel_shared[((((int)threadIdx.x) * 144) + 61)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[183] * kernel_shared[((((int)threadIdx.x) * 144) + 61)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[184] * kernel_shared[((((int)threadIdx.x) * 144) + 61)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[185] * kernel_shared[((((int)threadIdx.x) * 144) + 61)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[186] * kernel_shared[((((int)threadIdx.x) * 144) + 61)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[187] * kernel_shared[((((int)threadIdx.x) * 144) + 61)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[208] * kernel_shared[((((int)threadIdx.x) * 144) + 70)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[209] * kernel_shared[((((int)threadIdx.x) * 144) + 70)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[210] * kernel_shared[((((int)threadIdx.x) * 144) + 70)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[211] * kernel_shared[((((int)threadIdx.x) * 144) + 70)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[212] * kernel_shared[((((int)threadIdx.x) * 144) + 70)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[213] * kernel_shared[((((int)threadIdx.x) * 144) + 70)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[214] * kernel_shared[((((int)threadIdx.x) * 144) + 70)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[127] * kernel_shared[((((int)threadIdx.x) * 144) + 115)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[128] * kernel_shared[((((int)threadIdx.x) * 144) + 115)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[129] * kernel_shared[((((int)threadIdx.x) * 144) + 115)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[130] * kernel_shared[((((int)threadIdx.x) * 144) + 115)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[131] * kernel_shared[((((int)threadIdx.x) * 144) + 115)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[132] * kernel_shared[((((int)threadIdx.x) * 144) + 115)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[133] * kernel_shared[((((int)threadIdx.x) * 144) + 115)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[154] * kernel_shared[((((int)threadIdx.x) * 144) + 124)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[155] * kernel_shared[((((int)threadIdx.x) * 144) + 124)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[156] * kernel_shared[((((int)threadIdx.x) * 144) + 124)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[157] * kernel_shared[((((int)threadIdx.x) * 144) + 124)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[158] * kernel_shared[((((int)threadIdx.x) * 144) + 124)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[159] * kernel_shared[((((int)threadIdx.x) * 144) + 124)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[160] * kernel_shared[((((int)threadIdx.x) * 144) + 124)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[181] * kernel_shared[((((int)threadIdx.x) * 144) + 133)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[182] * kernel_shared[((((int)threadIdx.x) * 144) + 133)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[183] * kernel_shared[((((int)threadIdx.x) * 144) + 133)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[184] * kernel_shared[((((int)threadIdx.x) * 144) + 133)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[185] * kernel_shared[((((int)threadIdx.x) * 144) + 133)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[186] * kernel_shared[((((int)threadIdx.x) * 144) + 133)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[187] * kernel_shared[((((int)threadIdx.x) * 144) + 133)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[208] * kernel_shared[((((int)threadIdx.x) * 144) + 142)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[209] * kernel_shared[((((int)threadIdx.x) * 144) + 142)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[210] * kernel_shared[((((int)threadIdx.x) * 144) + 142)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[211] * kernel_shared[((((int)threadIdx.x) * 144) + 142)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[212] * kernel_shared[((((int)threadIdx.x) * 144) + 142)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[213] * kernel_shared[((((int)threadIdx.x) * 144) + 142)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[214] * kernel_shared[((((int)threadIdx.x) * 144) + 142)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[128] * kernel_shared[((((int)threadIdx.x) * 144) + 44)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[129] * kernel_shared[((((int)threadIdx.x) * 144) + 44)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[130] * kernel_shared[((((int)threadIdx.x) * 144) + 44)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[131] * kernel_shared[((((int)threadIdx.x) * 144) + 44)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[132] * kernel_shared[((((int)threadIdx.x) * 144) + 44)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[133] * kernel_shared[((((int)threadIdx.x) * 144) + 44)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[134] * kernel_shared[((((int)threadIdx.x) * 144) + 44)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[155] * kernel_shared[((((int)threadIdx.x) * 144) + 53)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[156] * kernel_shared[((((int)threadIdx.x) * 144) + 53)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[157] * kernel_shared[((((int)threadIdx.x) * 144) + 53)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[158] * kernel_shared[((((int)threadIdx.x) * 144) + 53)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[159] * kernel_shared[((((int)threadIdx.x) * 144) + 53)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[160] * kernel_shared[((((int)threadIdx.x) * 144) + 53)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[161] * kernel_shared[((((int)threadIdx.x) * 144) + 53)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[182] * kernel_shared[((((int)threadIdx.x) * 144) + 62)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[183] * kernel_shared[((((int)threadIdx.x) * 144) + 62)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[184] * kernel_shared[((((int)threadIdx.x) * 144) + 62)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[185] * kernel_shared[((((int)threadIdx.x) * 144) + 62)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[186] * kernel_shared[((((int)threadIdx.x) * 144) + 62)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[187] * kernel_shared[((((int)threadIdx.x) * 144) + 62)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[188] * kernel_shared[((((int)threadIdx.x) * 144) + 62)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[209] * kernel_shared[((((int)threadIdx.x) * 144) + 71)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[210] * kernel_shared[((((int)threadIdx.x) * 144) + 71)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[211] * kernel_shared[((((int)threadIdx.x) * 144) + 71)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[212] * kernel_shared[((((int)threadIdx.x) * 144) + 71)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[213] * kernel_shared[((((int)threadIdx.x) * 144) + 71)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[214] * kernel_shared[((((int)threadIdx.x) * 144) + 71)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[215] * kernel_shared[((((int)threadIdx.x) * 144) + 71)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[128] * kernel_shared[((((int)threadIdx.x) * 144) + 116)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[129] * kernel_shared[((((int)threadIdx.x) * 144) + 116)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[130] * kernel_shared[((((int)threadIdx.x) * 144) + 116)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[131] * kernel_shared[((((int)threadIdx.x) * 144) + 116)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[132] * kernel_shared[((((int)threadIdx.x) * 144) + 116)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[133] * kernel_shared[((((int)threadIdx.x) * 144) + 116)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[134] * kernel_shared[((((int)threadIdx.x) * 144) + 116)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[155] * kernel_shared[((((int)threadIdx.x) * 144) + 125)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[156] * kernel_shared[((((int)threadIdx.x) * 144) + 125)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[157] * kernel_shared[((((int)threadIdx.x) * 144) + 125)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[158] * kernel_shared[((((int)threadIdx.x) * 144) + 125)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[159] * kernel_shared[((((int)threadIdx.x) * 144) + 125)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[160] * kernel_shared[((((int)threadIdx.x) * 144) + 125)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[161] * kernel_shared[((((int)threadIdx.x) * 144) + 125)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[182] * kernel_shared[((((int)threadIdx.x) * 144) + 134)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[183] * kernel_shared[((((int)threadIdx.x) * 144) + 134)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[184] * kernel_shared[((((int)threadIdx.x) * 144) + 134)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[185] * kernel_shared[((((int)threadIdx.x) * 144) + 134)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[186] * kernel_shared[((((int)threadIdx.x) * 144) + 134)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[187] * kernel_shared[((((int)threadIdx.x) * 144) + 134)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[188] * kernel_shared[((((int)threadIdx.x) * 144) + 134)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[209] * kernel_shared[((((int)threadIdx.x) * 144) + 143)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[210] * kernel_shared[((((int)threadIdx.x) * 144) + 143)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[211] * kernel_shared[((((int)threadIdx.x) * 144) + 143)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[212] * kernel_shared[((((int)threadIdx.x) * 144) + 143)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[213] * kernel_shared[((((int)threadIdx.x) * 144) + 143)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[214] * kernel_shared[((((int)threadIdx.x) * 144) + 143)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[215] * kernel_shared[((((int)threadIdx.x) * 144) + 143)]));
+        for (int rc_outer_inner = 0; rc_outer_inner < 2; ++rc_outer_inner) {
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[(((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18))]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[(((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18))]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[(((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18))]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[(((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18))]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[(((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18))]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[(((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18))]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[(((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18))]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 3)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 3)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 3)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 3)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 3)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 3)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 3)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 6)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 6)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 6)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 6)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 6)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 6)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 6)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 9)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 9)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 9)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 9)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 9)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 9)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 9)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 90)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 12)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 12)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 12)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 12)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 12)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 12)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 12)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 99)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 15)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 15)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 15)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 15)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 15)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 15)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 15)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 36)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 36)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 36)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 36)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 36)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 36)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 36)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 39)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 39)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 39)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 39)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 39)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 39)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 39)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 42)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 42)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 42)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 42)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 42)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 42)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 42)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 45)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 45)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 45)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 45)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 45)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 45)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 45)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 90)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 48)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 48)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 48)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 48)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 48)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 48)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 48)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 99)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 51)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 51)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 51)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 51)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 51)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 51)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 51)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 72)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 72)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 72)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 72)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 72)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 72)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 72)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 75)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 75)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 75)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 75)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 75)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 75)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 75)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 78)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 78)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 78)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 78)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 78)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 78)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 78)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 81)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 81)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 81)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 81)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 81)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 81)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 81)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 90)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 84)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 84)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 84)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 84)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 84)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 84)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 84)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 99)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 87)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 87)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 87)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 87)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 87)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 87)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 87)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 108)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 108)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 108)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 108)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 108)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 108)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 108)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 111)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 111)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 111)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 111)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 111)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 111)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 111)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 114)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 114)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 114)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 114)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 114)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 114)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 114)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 117)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 117)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 117)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 117)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 117)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 117)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 117)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 90)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 120)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 120)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 120)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 120)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 120)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 120)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 120)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 99)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 123)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 123)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 123)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 123)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 123)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 123)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 123)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 1)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 1)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 1)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 1)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 1)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 1)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 1)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 4)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 4)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 4)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 4)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 4)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 4)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 4)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 7)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 7)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 7)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 7)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 7)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 7)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 7)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 10)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 10)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 10)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 10)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 10)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 10)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 10)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 13)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 13)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 13)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 13)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 13)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 13)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 13)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 16)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 16)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 16)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 16)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 16)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 16)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 16)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 37)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 37)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 37)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 37)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 37)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 37)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 37)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 40)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 40)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 40)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 40)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 40)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 40)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 40)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 43)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 43)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 43)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 43)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 43)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 43)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 43)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 46)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 46)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 46)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 46)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 46)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 46)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 46)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 49)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 49)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 49)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 49)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 49)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 49)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 49)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 52)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 52)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 52)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 52)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 52)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 52)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 52)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 73)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 73)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 73)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 73)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 73)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 73)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 73)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 76)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 76)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 76)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 76)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 76)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 76)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 76)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 79)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 79)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 79)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 79)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 79)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 79)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 79)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 82)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 82)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 82)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 82)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 82)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 82)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 82)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 85)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 85)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 85)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 85)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 85)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 85)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 85)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 88)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 88)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 88)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 88)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 88)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 88)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 88)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 109)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 109)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 109)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 109)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 109)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 109)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 109)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 112)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 112)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 112)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 112)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 112)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 112)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 112)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 115)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 115)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 115)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 115)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 115)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 115)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 115)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 118)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 118)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 118)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 118)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 118)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 118)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 118)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 91)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 121)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 121)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 121)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 121)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 121)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 121)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 121)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 100)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 124)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 124)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 124)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 124)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 124)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 124)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 124)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 2)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 2)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 2)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 2)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 2)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 2)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 2)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 5)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 5)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 5)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 5)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 5)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 5)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 5)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 8)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 8)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 8)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 8)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 8)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 8)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 26)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 8)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 11)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 11)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 11)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 11)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 11)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 11)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 89)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 11)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 14)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 14)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 14)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 14)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 14)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 14)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 98)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 14)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 17)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 17)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 17)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 17)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 17)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 17)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 107)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 17)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 38)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 38)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 38)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 38)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 38)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 38)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 38)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 41)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 41)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 41)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 41)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 41)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 41)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 41)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 44)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 44)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 44)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 44)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 44)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 44)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 26)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 44)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 47)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 47)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 47)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 47)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 47)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 47)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 89)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 47)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 50)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 50)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 50)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 50)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 50)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 50)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 98)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 50)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 53)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 53)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 53)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 53)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 53)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 53)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 107)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 53)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 74)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 74)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 74)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 74)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 74)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 74)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 74)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 77)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 77)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 77)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 77)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 77)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 77)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 77)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 80)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 80)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 80)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 80)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 80)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 80)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 26)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 80)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 83)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 83)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 83)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 83)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 83)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 83)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 89)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 83)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 86)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 86)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 86)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 86)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 86)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 86)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 98)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 86)]));
+          conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 89)]));
+          conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 89)]));
+          conv2d_nchw[16] = (conv2d_nchw[16] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 89)]));
+          conv2d_nchw[17] = (conv2d_nchw[17] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 89)]));
+          conv2d_nchw[18] = (conv2d_nchw[18] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 89)]));
+          conv2d_nchw[19] = (conv2d_nchw[19] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 89)]));
+          conv2d_nchw[20] = (conv2d_nchw[20] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 107)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 89)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 110)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 110)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 110)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 110)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 110)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 110)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 110)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 113)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 113)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 113)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 113)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 113)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 113)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 113)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 116)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 21)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 116)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 22)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 116)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 23)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 116)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 24)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 116)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 25)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 116)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 26)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 116)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 119)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 84)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 119)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 85)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 119)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 86)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 119)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 87)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 119)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 88)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 119)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 89)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 119)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 92)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 122)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 93)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 122)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 94)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 122)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 95)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 122)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 96)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 122)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 97)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 122)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 98)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 122)]));
+          conv2d_nchw[21] = (conv2d_nchw[21] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 101)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 125)]));
+          conv2d_nchw[22] = (conv2d_nchw[22] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 102)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 125)]));
+          conv2d_nchw[23] = (conv2d_nchw[23] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 103)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 125)]));
+          conv2d_nchw[24] = (conv2d_nchw[24] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 104)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 125)]));
+          conv2d_nchw[25] = (conv2d_nchw[25] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 105)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 125)]));
+          conv2d_nchw[26] = (conv2d_nchw[26] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 106)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 125)]));
+          conv2d_nchw[27] = (conv2d_nchw[27] + (pad_temp_shared[(((rc_outer_inner * 162) + ((((int)threadIdx.x) % 7) * 9)) + 107)] * kernel_shared[((((((int)threadIdx.x) / 7) * 144) + (rc_outer_inner * 18)) + 125)]));
+        }
       }
-      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
-        compute[(((((((int)blockIdx.x) / 7) * 784) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7))] = max((conv2d_nchw[i1_inner] + bias[((((((int)blockIdx.x) / 7) * 16) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        compute[((((((((int)blockIdx.x) / 7) * 784) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + 1)] = max((conv2d_nchw[(i1_inner + 2)] + bias[((((((int)blockIdx.x) / 7) * 16) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        compute[((((((((int)blockIdx.x) / 7) * 784) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + 2)] = max((conv2d_nchw[(i1_inner + 4)] + bias[((((((int)blockIdx.x) / 7) * 16) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        compute[((((((((int)blockIdx.x) / 7) * 784) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + 3)] = max((conv2d_nchw[(i1_inner + 6)] + bias[((((((int)blockIdx.x) / 7) * 16) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        compute[((((((((int)blockIdx.x) / 7) * 784) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + 4)] = max((conv2d_nchw[(i1_inner + 8)] + bias[((((((int)blockIdx.x) / 7) * 16) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        compute[((((((((int)blockIdx.x) / 7) * 784) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + 5)] = max((conv2d_nchw[(i1_inner + 10)] + bias[((((((int)blockIdx.x) / 7) * 16) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        compute[((((((((int)blockIdx.x) / 7) * 784) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + 6)] = max((conv2d_nchw[(i1_inner + 12)] + bias[((((((int)blockIdx.x) / 7) * 16) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
+      for (int i1_inner = 0; i1_inner < 4; ++i1_inner) {
+        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
+          compute[(((((((int)blockIdx.x) * 784) + ((((int)threadIdx.x) / 7) * 196)) + (i1_inner * 49)) + ((((int)threadIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[(((((int)blockIdx.x) * 16) + ((((int)threadIdx.x) / 7) * 4)) + i1_inner)]), 0.000000e+00f);
+        }
       }
     }
 
@@ -3067,7 +1900,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  40.053 seconds)
+   **Total running time of the script:** ( 2 minutes  39.675 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index c9d7eae86..6cb1e6300 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -646,7 +646,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      10.1090      10.1229      10.1553      10.0487       0.0446   
+       9.6697       9.6841       9.7189       9.6062       0.0471   
                
 
 
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index ddf4d5ca2..d6fa682d5 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -665,7 +665,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      769.2162     768.9715     769.9746     768.7023      0.5475   
+      748.9835     749.1248     749.2464     748.5791      0.2902   
                
 
 
@@ -693,7 +693,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  19.621 seconds)
+   **Total running time of the script:** ( 1 minutes  21.051 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index 76751acf5..99e6083dc 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -396,77 +396,29 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-      preflattened_buffer_map = {placeholder_9: placeholder_15: Buffer(placeholder_14, float32, [128, 512], []), placeholder_8: placeholder_16: Buffer(placeholder_13, int32, [33], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_5: placeholder_17: Buffer(placeholder_10, float32, [128, 256], []), placeholder_6: placeholder_18: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_7: placeholder_19: Buffer(placeholder_12, int32, [4916], [])} {
+      preflattened_buffer_map = {placeholder_5: placeholder_15: Buffer(placeholder_10, float32, [128, 256], []), placeholder_9: placeholder_16: Buffer(placeholder_14, float32, [128, 512], []), placeholder_6: placeholder_17: Buffer(placeholder_11, float32, [4916, 16, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_7: placeholder_18: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_19: Buffer(placeholder_13, int32, [33], [])} {
       for (i0.outer.i1.outer.fused: int32, 0, 128) "parallel" {
         allocate(compute_4: Pointer(global float32), float32, [512]), storage_scope = global {
           for (nb_j.inner: int32, 0, 2) {
             for (i.inner.init: int32, 0, 16) {
-              let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
-               {
-                compute_5: Buffer(compute_4, float32, [512], [])[cse_var_1] = 0f32
-                compute_5[(cse_var_1 + 1)] = 0f32
-                compute_5[(cse_var_1 + 2)] = 0f32
-                compute_5[(cse_var_1 + 3)] = 0f32
-                compute_5[(cse_var_1 + 4)] = 0f32
-                compute_5[(cse_var_1 + 5)] = 0f32
-                compute_5[(cse_var_1 + 6)] = 0f32
-                compute_5[(cse_var_1 + 7)] = 0f32
-                compute_5[(cse_var_1 + 8)] = 0f32
-                compute_5[(cse_var_1 + 9)] = 0f32
-                compute_5[(cse_var_1 + 10)] = 0f32
-                compute_5[(cse_var_1 + 11)] = 0f32
-                compute_5[(cse_var_1 + 12)] = 0f32
-                compute_5[(cse_var_1 + 13)] = 0f32
-                compute_5[(cse_var_1 + 14)] = 0f32
-                compute_5[(cse_var_1 + 15)] = 0f32
+              for (j.init: int32, 0, 16) {
+                compute_5: Buffer(compute_4, float32, [512], [])[(((i.inner.init*32) + (nb_j.inner*16)) + j.init)] = 0f32
               }
             }
-            for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
+            for (elem_idx: int32, 0, let cse_var_1: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_1 + 1)] - placeholder_3[cse_var_1])) {
               for (i.inner: int32, 0, 16) {
-                let cse_var_21: int32 = (elem_idx*16)
-                let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
-                let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
-                let cse_var_18: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i.inner*256))
-                let cse_var_17: int32 = (cse_var_20 + 9)
-                let cse_var_16: int32 = (cse_var_20 + 8)
-                let cse_var_15: int32 = (cse_var_20 + 7)
-                let cse_var_14: int32 = (cse_var_20 + 6)
-                let cse_var_13: int32 = (cse_var_20 + 5)
-                let cse_var_12: int32 = (cse_var_20 + 4)
-                let cse_var_11: int32 = (cse_var_20 + 3)
-                let cse_var_10: int32 = (cse_var_20 + 2)
-                let cse_var_9: int32 = (cse_var_20 + 15)
-                let cse_var_8: int32 = (cse_var_20 + 14)
-                let cse_var_7: int32 = (cse_var_20 + 13)
-                let cse_var_6: int32 = (cse_var_20 + 12)
-                let cse_var_5: int32 = (cse_var_20 + 11)
-                let cse_var_4: int32 = (cse_var_20 + 10)
-                let cse_var_3: int32 = (cse_var_20 + 1)
-                 {
-                  compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                for (j: int32, 0, 16) {
+                  let cse_var_3: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
+                  let cse_var_2: int32 = (((i.inner*32) + (nb_j.inner*16)) + j)
+                  compute_5[cse_var_2] = (compute_5[cse_var_2] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + j)]*max(placeholder[(((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
                 }
               }
             }
           }
           for (i0.inner: int32, 0, 16) {
             for (i1.inner: int32, 0, 32) {
-              let cse_var_22: int32 = ((((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32)) + i1.inner)
-              compute[cse_var_22] = max((compute_5[((i0.inner*32) + i1.inner)] + placeholder_4[cse_var_22]), 0f32)
+              let cse_var_4: int32 = ((((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32)) + i1.inner)
+              compute[cse_var_4] = max((compute_5[((i0.inner*32) + i1.inner)] + placeholder_4[cse_var_4]), 0f32)
             }
           }
         }
@@ -523,7 +475,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.666 ms
+    Execution time of this operator: 1.499 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index f48d05a90..1dca9602f 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:43.246** total execution time for **how_to_tune_with_autotvm** files:
+**00:43.275** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:43.213 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:43.242 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.019 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index c4f0792d2..503fff72c 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -879,8 +879,8 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2885496
-    No: 6   GFLOPS: 95.05/95.05     result: MeasureResult(costs=(0.0024355176041666667,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6161141395568848, timestamp=1656396136.7135155)      [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
-    No: 7   GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 6   GFLOPS: 96.04/96.04     result: MeasureResult(costs=(0.0024105358333333334,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.778219223022461, timestamp=1656401725.2942889)       [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
+    No: 7   GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1003,7 +1003,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 16, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6225319
-    No: 8   GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 8   GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1126,7 +1126,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,943546
-    No: 9   GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 9   GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1249,7 +1249,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2868708
-    No: 10  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 10  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
         res = future.result()
       File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1267,7 +1267,7 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4691833
-    No: 11  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 11  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1390,7 +1390,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1042124
-    No: 12  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 12  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1513,7 +1513,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10013405
-    No: 13  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 13  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1636,7 +1636,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6732082
-    No: 14  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 14  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1759,7 +1759,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7536735
-    No: 15  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 15  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1882,7 +1882,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,482121
-    No: 16  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 16  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2005,7 +2005,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2824525
-    No: 17  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 17  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2128,7 +2128,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4559286
-    No: 18  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 18  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2251,7 +2251,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 32, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9677544
-    No: 19  GFLOPS: 0.00/95.05      result: Traceback (most recent call last):
+    No: 19  GFLOPS: 0.00/96.04      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 738, in __call__
         yield remote, remote.load_module(os.path.split(build_result.filename)[1])
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 702, in run_through_rpc
@@ -2339,7 +2339,7 @@ for this template
       15: _PyEval_EvalFrameDefault
       14: 0x0000000000537c30
       13: _PyObject_FastCallKeywords
-      12: 0x00007f0c20390fa2
+      12: 0x00007fb855c44fa2
       11: _ctypes_callproc
       10: ffi_call
       9: ffi_call_unix64
@@ -2404,7 +2404,7 @@ for this template
       21: _PyFunction_FastCallKeywords
       20: _PyEval_EvalFrameDefault
       19: _PyFunction_FastCall      [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6390073
-    No: 20  GFLOPS: 142.67/142.67   result: MeasureResult(costs=(0.0016225824677419353,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.1343483924865723, timestamp=1656396162.2331307)      [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
+    No: 20  GFLOPS: 144.95/144.95   result: MeasureResult(costs=(0.0015971210000000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.130061149597168, timestamp=1656401751.5317616)       [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
 
 
 
@@ -2461,7 +2461,7 @@ and measure running time.
     Best config:
     [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
     Finish loading 20 records
-    Time cost of this operator: 0.002012
+    Time cost of this operator: 0.001968
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index 51bc05298..609c53bf9 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -328,10 +328,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.8     98.724   (1, 2, 10, 10, 3)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.141     0.991    (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.901     0.284    (1, 1, 10, 10, 3)  1       1        
-    Total_time                                    -                                             316.842   -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  311.0     98.753   (1, 2, 10, 10, 3)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.003     0.954    (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.925     0.294    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             314.928   -        -                  -       -        
 
 
 
@@ -397,10 +397,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  81.15     96.795   (1, 6, 10, 10, 1)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.756     2.095    (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.931     1.11     (1, 1, 10, 10, 3)  1       1        
-    Total_time                                    -                                             83.837    -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  123.3     97.799   (1, 6, 10, 10, 1)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.825     1.448    (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.95      0.754    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             126.075   -        -                  -       -        
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index 2f1341320..b43527d8e 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -225,7 +225,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmpe6ak382e/images/random'
+    '/tmp/tmpt3bmchec/images/random'
 
 
 
@@ -325,8 +325,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmpe6ak382e/images/target contains 8144 images
-    /tmp/tmpe6ak382e/images/random contains 5000 images
+    /tmp/tmpt3bmchec/images/target contains 8144 images
+    /tmp/tmpt3bmchec/images/random contains 5000 images
 
 
 
@@ -501,13 +501,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 55s - loss: 0.2073 - accuracy: 0.9304 - val_loss: 0.1338 - val_accuracy: 0.9596
+    328/328 - 55s - loss: 0.2215 - accuracy: 0.9246 - val_loss: 0.1291 - val_accuracy: 0.9603
     Epoch 2/3
-    328/328 - 52s - loss: 0.0969 - accuracy: 0.9645 - val_loss: 0.1353 - val_accuracy: 0.9603
+    328/328 - 52s - loss: 0.0988 - accuracy: 0.9627 - val_loss: 0.1030 - val_accuracy: 0.9660
     Epoch 3/3
-    328/328 - 52s - loss: 0.0616 - accuracy: 0.9776 - val_loss: 0.1258 - val_accuracy: 0.9619
+    328/328 - 52s - loss: 0.0680 - accuracy: 0.9749 - val_loss: 0.1247 - val_accuracy: 0.9630
 
-    <keras.callbacks.History object at 0x7fb6f11f8a10>
+    <keras.callbacks.History object at 0x7f52315ef510>
 
 
 
@@ -864,7 +864,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 8 minutes  3.883 seconds)
+   **Total running time of the script:** ( 7 minutes  43.170 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index 3540fb0da..16284341c 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**08:49.354** total execution time for **how_to_work_with_microtvm** files:
+**08:30.605** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 08:03.883 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 07:43.170 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:42.063 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:43.950 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.408 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.485 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.000 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 828bfd8c4..80ba26fa4 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:09.875** total execution time for **how_to_work_with_relay** files:
+**00:09.619** total execution time for **how_to_work_with_relay** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:08.148 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:08.129 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.721 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.484 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)       | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index 73a51a7f4..f620281bb 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -259,7 +259,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7fb6f1681440>
+    <function my_cuda_math_rule at 0x7f51a80b19e0>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 9aed2d6c6..57cdd80fe 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**00:03.995** total execution time for **how_to_work_with_schedules** files:
+**00:04.033** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.858 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.917 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:00.957 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:00.903 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.513 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.526 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.495 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.512 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.098 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.100 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.035 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index e33f61e0c..b883fd666 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -346,7 +346,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C}
       preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmp8ze5l6i5/input0.cc'\nsource_filename = \"/tmp/tmp8ze5l6i5/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpsvu4139j/input0.cc'\nsource_filename = \"/tmp/tmpsvu4139j/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 365dabbc4..38ec8bd60 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:20.192** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:20.475** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:20.186 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:20.469 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.006 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 9860662d7..fe8fd050e 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -291,7 +291,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 21.82s!
+    resnet18_v1 inference graph built in 22.59s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index e0895b3ed..e004297f3 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -335,7 +335,7 @@ The compilation steps are:
       "target_host parameter is going to be deprecated. "
     /workspace/python/tvm/relay/build_module.py:411: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 15.32s!
+    yolov3-tiny inference graph built in 15.43s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index c087d109a..fd9d23dc6 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:29.892** total execution time for **topic_vta_tutorials_frontend** files:
+**01:30.235** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:47.846 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:47.448 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:42.045 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:42.787 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 8fa913c62..7e17c9d00 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.201** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.236** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.822 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.851 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.380 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.385 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index c94521dc8..4d93ede83 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.682** total execution time for **topic_vta_tutorials** files:
+**00:00.713** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.367 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.382 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.316 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.331 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index 6785a0874..7fe415115 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -327,7 +327,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 93.200 ms
+    Execution time of this operator: 93.240 ms
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index 4877b8962..e0729e2ac 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -449,16 +449,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 10.76/10.76     result: MeasureResult(costs=(0.0249553986,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5325510501861572, timestamp=1656395021.79173) [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
-    No: 2   GFLOPS: 2.94/10.76      result: MeasureResult(costs=(0.0914591624,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6135313510894775, timestamp=1656395023.4178717)       [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
-    No: 3   GFLOPS: 11.87/11.87     result: MeasureResult(costs=(0.022621877000000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5592312812805176, timestamp=1656395024.4617643)       [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
-    No: 4   GFLOPS: 1.86/11.87      result: MeasureResult(costs=(0.14416801499999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.423171043395996, timestamp=1656395027.4525702) [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
-    No: 5   GFLOPS: 3.67/11.87      result: MeasureResult(costs=(0.07316351499999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3100175857543945, timestamp=1656395028.8931713)        [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
-    No: 6   GFLOPS: 1.72/11.87      result: MeasureResult(costs=(0.1559495764,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6608426570892334, timestamp=1656395031.5995502)       [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
-    No: 7   GFLOPS: 0.87/11.87      result: MeasureResult(costs=(0.30922754599999996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.062238693237305, timestamp=1656395037.2297213) [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
-    No: 8   GFLOPS: 10.65/11.87     result: MeasureResult(costs=(0.0252154422,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5494849681854248, timestamp=1656395037.7954605)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
-    No: 9   GFLOPS: 1.67/11.87      result: MeasureResult(costs=(0.16062415840000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6897850036621094, timestamp=1656395040.607338) [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
-    No: 10  GFLOPS: 2.61/11.87      result: MeasureResult(costs=(0.10280037099999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7690143585205078, timestamp=1656395042.4161282)        [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
+    No: 1   GFLOPS: 9.75/9.75       result: MeasureResult(costs=(0.027534251599999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5763068199157715, timestamp=1656399879.77563) [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
+    No: 2   GFLOPS: 2.72/9.75       result: MeasureResult(costs=(0.098577908,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7136471271514893, timestamp=1656399881.515282) [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
+    No: 3   GFLOPS: 11.85/11.85     result: MeasureResult(costs=(0.022658018999999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5652670860290527, timestamp=1656399882.5541406)       [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
+    No: 4   GFLOPS: 1.85/11.85      result: MeasureResult(costs=(0.1448950412,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.4405906200408936, timestamp=1656399885.564221)        [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
+    No: 5   GFLOPS: 3.68/11.85      result: MeasureResult(costs=(0.0730119178,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.305342435836792, timestamp=1656399887.5210543)        [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
+    No: 6   GFLOPS: 1.86/11.85      result: MeasureResult(costs=(0.144430656,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.472351551055908, timestamp=1656399890.0386963) [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
+    No: 7   GFLOPS: 0.85/11.85      result: MeasureResult(costs=(0.3170945812,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.187531471252441, timestamp=1656399895.273347) [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 8   GFLOPS: 10.72/11.85     result: MeasureResult(costs=(0.0250315392,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.542952299118042, timestamp=1656399895.8364944)        [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
+    No: 9   GFLOPS: 1.60/11.85      result: MeasureResult(costs=(0.167470595,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.7763662338256836, timestamp=1656399898.7313275)        [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
+    No: 10  GFLOPS: 2.54/11.85      result: MeasureResult(costs=(0.1056889338,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7943274974822998, timestamp=1656399900.5846884)       [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index f46254a53..8d04b2cbf 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -314,7 +314,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 491.2560203000021, 'median': 491.31815010000537, 'std': 0.38931689956801807}
+    {'mean': 494.2233797100016, 'median': 493.65905760000715, 'std': 1.3174742070766543}
 
 
 
@@ -550,31 +550,31 @@ the tuning data to.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.55/  17.55 GFLOPS | Progress: (4/20) | 6.14 s
    [Task  1/25]  Current/Best:    6.15/  17.55 GFLOPS | Progress: (8/20) | 9.03 s
    [Task  1/25]  Current/Best:   11.46/  22.83 GFLOPS | Progress: (12/20) | 11.43 s
    [Task  1/25]  Current/Best:   16.85/  22.83 GFLOPS | Progress: (16/20) | 13.12 s
    [Task  1/25]  Current/Best:   11.55/  23.96 GFLOPS | Progress: (20/20) | 14.84 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.24/  12.89 GFLOPS | Progress: (4/20) | 3.72 s
    [Task  2/25]  Current/Best:   12.97/  18.57 GFLOPS | Progress: (8/20) | 5.01 s
    [Task  2/25]  Current/Best:   21.16/  21.16 GFLOPS | Progress: (12/20) | 6.36 s
    [Task  2/25]  Current/Best:   12.33/  21.16 GFLOPS | Progress: (16/20) | 7.62 s
    [Task  2/25]  Current/Best:   19.16/  21.16 GFLOPS | Progress: (20/20) | 9.21 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.59 GFLOPS | Progress: (4/20) | 5.82 s
    [Task  3/25]  Current/Best:   15.59/  16.90 GFLOPS | Progress: (8/20) | 7.75 s
    [Task  3/25]  Current/Best:   14.93/  16.90 GFLOPS | Progress: (12/20) | 9.45 s
    [Task  3/25]  Current/Best:    7.23/  23.73 GFLOPS | Progress: (16/20) | 11.34 s
    [Task  3/25]  Current/Best:   12.64/  23.73 GFLOPS | Progress: (20/20) | 15.81 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.53/  20.47 GFLOPS | Progress: (4/20) | 2.36 s
    [Task  4/25]  Current/Best:    6.89/  20.47 GFLOPS | Progress: (8/20) | 6.64 s
    [Task  4/25]  Current/Best:   22.25/  22.25 GFLOPS | Progress: (12/20) | 11.03 s
    [Task  4/25]  Current/Best:   17.44/  22.25 GFLOPS | Progress: (16/20) | 13.21 s
    [Task  4/25]  Current/Best:   13.47/  22.25 GFLOPS | Progress: (20/20) | 15.22 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.58/  10.35 GFLOPS | Progress: (4/20) | 2.55 s
    [Task  5/25]  Current/Best:   11.70/  12.70 GFLOPS | Progress: (8/20) | 4.60 s
    [Task  5/25]  Current/Best:   11.67/  17.96 GFLOPS | Progress: (12/20) | 7.68 s
    [Task  5/25]  Current/Best:   11.77/  22.76 GFLOPS | Progress: (16/20) | 9.13 s
    [Task  5/25]  Current/Best:   12.08/  22.76 GFLOPS | Progress: (20/20) | 10.98 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.25/  20.63 GFLOPS | Progress: (4/20) | 3.95 s
    [Task  6/25]  Current/Best:   18.97/  20.63 GFLOPS | Progress: (8/20) | 5.70 s
    [Task  6/25]  Current/Best:   13.26/  20.63 GFLOPS | Progress: (12/20) | 7.62 s
    [Task  6/25]  Current/Best:   20.03/  20.63 GFLOPS | Progress: (16/20) | 9.83 s
    [Task  6/25]  Current/Best:    3.73/  20.63 GFLOPS | Progress: (20/20) | 12.34 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.24/  12.92 GFLOPS | Progress: (4/20) | 3.60 s
    [Task  7/25]  Current/Best:   20.29/  21.03 GFLOPS | Progress: (8/20) | 5.11 s
    [Task  7/25]  Current/Best:   16.10/  21.03 GFLOPS | Progress: (12/20) | 7.02 s
    [Task  7/25]  Current/Best:   12.20/  21.03 GFLOPS | Progress: (16/20) | 9.06 s
    [Task  7/25]  Current/Best:    6.32/  21.66 GFLOPS | Progress: (20/20) | 11.52 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.12/  13.83 GFLOPS | Progress: (4/20) | 2.88 s
    [Task  8/25]  Current/Best:    9.45/  13.83 GFLOPS | Progress: (8/20) | 7.65 s
    [Task  8/25]  Current/Best:   12.32/  13.83 GFLOPS | Progress: (12/20) | 13.71 s
    [Task  8/25]  Current/Best:   18.69/  18.69 GFLOPS | Progress: (16/20) | 15.81 s
    [Task  8/25]  Current/Best:   20.08/  20.08 GFLOPS | Progress: (20/20) | 22.28 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.43/  15.71 GFLOPS | Progress: (4/20) | 11.92 s
    [Task  9/25]  Current/Best:   23.54/  23.54 GFLOPS | Progress: (8/20) | 13.62 s
    [Task  9/25]  Current/Best:    8.28/  23.54 GFLOPS | Progress: (12/20) | 15.94 s
    [Task  9/25]  Current/Best:   17.56/  23.54 GFLOPS | Progress: (16/20) | 18.60 s
    [Task  9/25]  Current/Best:    9.06/  23.54 GFLOPS | Progress: (20/20) | 26.15 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.23/  18.23 GFLOPS | Progress: (4/20) | 2.56 s
    [Task 10/25]  Current/Best:   15.53/  18.23 GFLOPS | Progress: (8/20) | 4.14 s
    [Task 10/25]  Current/Best:   12.60/  18.92 GFLOPS | Progress: (12/20) | 5.66 s
    [Task 10/25]  Current/Best:   19.03/  20.32 GFLOPS | Progress: (16/20) | 6.76 s
    [Task 10/25]  Current/Best:    8.88/  20.32 GFLOPS | Progress: (20/20
 ) | 8.29 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.16/  18.14 GFLOPS | Progress: (4/20) | 3.32 s
    [Task 11/25]  Current/Best:   16.93/  18.14 GFLOPS | Progress: (8/20) | 6.05 s
    [Task 11/25]  Current/Best:   18.07/  18.14 GFLOPS | Progress: (12/20) | 8.06 s
    [Task 11/25]  Current/Best:   13.37/  21.10 GFLOPS | Progress: (16/20) | 10.83 s
    [Task 11/25]  Current/Best:   19.45/  21.63 GFLOPS | Progress: (20/20) | 12.85 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.85/  17.96 GFLOPS | Progress: (4/20) | 5.29 s
    [Task 12/25]  Current/Best:    5.18/  17.96 GFLOPS | Progress: (8/20) | 8.97 s
    [Task 12/25]  Current/Best:   18.91/  18.91 GFLOPS | Progress: (12/20) | 10.95 s
    [Task 12/25]  Current/Best:   15.46/  18.91 GFLOPS | Progress: (16/20) | 13.70 s
    [Task 12/25]  Current/Best:   15.16/  18.91 GFLOPS | Progress: (20/20) | 15.60 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.69/  17.32 GFLOPS | Progress: (4/20) | 3.61 s
    [Task 13/25]  Current/Best:   16.11/  21.00 GFLOPS | Progress: (8/20) | 6.04 s
    [Task 13/25]  Current/Best:   19.58/  21.59 GFLOPS | Progress: (12/20) | 8.90 s
    [Task 13/25]  Current/Best:   12.27/  21.59 GFLOPS | Progress: (16/20) | 12.23 s
    [Task 13/25]  Current/Best:   18.56/  21.59 GFLOPS | Progress: (20/20) | 14.49 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.59/  13.59 GFLOPS | Progress: (4/20) | 3.22 s
    [Task 14/25]  Current/Best:    6.00/  13.59 GFLOPS | Progress: (8/20) | 5.36 s
    [Task 14/25]  Current/Best:   21.20/  21.20 GFLOPS | Progress: (12/20) | 7.92 s
    [Task 14/25]  Current/Best:   16.25/  21.20 GFLOPS | Progress: (16/20) | 9.56 s Done.
-
    [Task 14/25]  Current/Best:   17.33/  21.20 GFLOPS | Progress: (20/20) | 11.29 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.11/  17.60 GFLOPS | Progress: (4/20) | 2.67 s
    [Task 15/25]  Current/Best:   14.46/  18.10 GFLOPS | Progress: (8/20) | 3.95 s
    [Task 15/25]  Current/Best:   10.40/  22.29 GFLOPS | Progress: (12/20) | 6.10 s
    [Task 15/25]  Current/Best:   20.39/  22.29 GFLOPS | Progress: (16/20) | 9.09 s
    [Task 15/25]  Current/Best:    9.71/  22.29 GFLOPS | Progress: (20/20) | 10.06 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   19.46/  19.46 GFLOPS | Progress: (4/20) | 3.01 s
    [Task 16/25]  Current/Best:    3.04/  19.46 GFLOPS | Progress: (8/20) | 4.62 s
    [Task 16/25]  Current/Best:   19.18/  19.46 GFLOPS | Progress: (12/20) | 5.83 s
    [Task 16/25]  Current/Best:   18.02/  19.46 GFLOPS | Progress: (16/20) |
  7.18 s
    [Task 16/25]  Current/Best:   10.02/  21.90 GFLOPS | Progress: (20/20) | 9.20 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.11/  18.77 GFLOPS | Progress: (4/20) | 4.66 s
    [Task 17/25]  Current/Best:   14.46/  23.38 GFLOPS | Progress: (8/20) | 7.38 s
    [Task 17/25]  Current/Best:   16.92/  23.38 GFLOPS | Progress: (12/20) | 9.46 s
    [Task 17/25]  Current/Best:   16.56/  23.38 GFLOPS | Progress: (16/20) | 11.56 s
    [Task 17/25]  Current/Best:   10.06/  23.38 GFLOPS | Progress: (20/20) | 13.66 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.23/  18.05 GFLOPS | Progress: (4/20) | 3.64 s
    [Task 18/25]  Current/Best:   10.61/  19.23 GFLOPS | Progress: (8/20) | 7.05 s
    [Task 18/25]  Current/Best:   19.40/  19.40 GFLOPS | Progress: (12/20) | 8.96 s
    [Task 18/25]  Current/Best:   10.11/  19.40 GFLOPS | Progress: (16/20) | 12.47 s
    [Task 18/25]  Current/Best:   20.16/  20.16 GFLOPS | Progress: (20/20) | 13.98 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.16/  20.45 GFLOPS | Progress: (4/20) | 5.91 s
    [Task 19/25]  Current/Best:    2.61/  20.45 GFLOPS | Progress: (8/20) | 9.20 s
    [Task 19/25]  Current/Best:   20.14/  21.89 GFLOPS | Progress: (12/20) | 11.99 s
    [Task 19/25]  Current/Best:   14.06/  21.89 GFLOPS | Progress: (16/20) | 14.85 s
    [Task 19/25]  Current/Best:    2.70/  23.78 GFLOPS | Progress: (20/20) | 17.70 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.15/  15.07 GFLOPS | Progress: (4/20) | 3.27 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.50/  17.50 GFLOPS | Progress: (4/20) | 6.33 s
    [Task  1/25]  Current/Best:    6.16/  17.50 GFLOPS | Progress: (8/20) | 9.22 s
    [Task  1/25]  Current/Best:   11.56/  22.83 GFLOPS | Progress: (12/20) | 11.67 s
    [Task  1/25]  Current/Best:   16.89/  22.83 GFLOPS | Progress: (16/20) | 13.34 s
    [Task  1/25]  Current/Best:   11.61/  23.79 GFLOPS | Progress: (20/20) | 15.09 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.33/  13.13 GFLOPS | Progress: (4/20) | 3.70 s
    [Task  2/25]  Current/Best:   13.88/  18.67 GFLOPS | Progress: (8/20) | 4.99 s
    [Task  2/25]  Current/Best:   20.78/  20.78 GFLOPS | Progress: (12/20) | 6.30 s
    [Task  2/25]  Current/Best:   12.22/  20.78 GFLOPS | Progress: (16/20) | 7.61 s
    [Task  2/25]  Current/Best:   18.93/  20.78 GFLOPS | Progress: (20/20) | 9.20 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.39 GFLOPS | Progress: (4/20) | 5.83 s
    [Task  3/25]  Current/Best:   15.52/  16.84 GFLOPS | Progress: (8/20) | 7.77 s
    [Task  3/25]  Current/Best:   14.90/  16.84 GFLOPS | Progress: (12/20) | 9.53 s
    [Task  3/25]  Current/Best:    7.19/  23.54 GFLOPS | Progress: (16/20) | 11.44 s
    [Task  3/25]  Current/Best:   12.71/  23.54 GFLOPS | Progress: (20/20) | 16.04 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.57/  20.50 GFLOPS | Progress: (4/20) | 2.36 s
    [Task  4/25]  Current/Best:    6.87/  20.50 GFLOPS | Progress: (8/20) | 7.05 s
    [Task  4/25]  Current/Best:   21.87/  21.87 GFLOPS | Progress: (12/20) | 11.96 s
    [Task  4/25]  Current/Best:   16.99/  21.87 GFLOPS | Progress: (16/20) | 14.36 s
    [Task  4/25]  Current/Best:   13.42/  21.87 GFLOPS | Progress: (20/20) | 16.45 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.57/  10.36 GFLOPS | Progress: (4/20) | 2.57 s
    [Task  5/25]  Current/Best:   11.70/  12.72 GFLOPS | Progress: (8/20) | 4.62 s
    [Task  5/25]  Current/Best:   11.45/  18.05 GFLOPS | Progress: (12/20) | 7.67 s
    [Task  5/25]  Current/Best:   11.76/  22.66 GFLOPS | Progress: (16/20) | 9.08 s
    [Task  5/25]  Current/Best:   11.53/  22.66 GFLOPS | Progress: (20/20) | 11.01 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.26/  20.63 GFLOPS | Progress: (4/20) | 4.10 s
    [Task  6/25]  Current/Best:   18.98/  20.63 GFLOPS | Progress: (8/20) | 5.85 s
    [Task  6/25]  Current/Best:   13.34/  20.63 GFLOPS | Progress: (12/20) | 7.79 s
    [Task  6/25]  Current/Best:   20.00/  20.63 GFLOPS | Progress: (16/20) | 10.04 s
    [Task  6/25]  Current/Best:    3.73/  20.63 GFLOPS | Progress: (20/20) | 12.58 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.24/  12.92 GFLOPS | Progress: (4/20) | 3.51 s
    [Task  7/25]  Current/Best:   20.31/  21.24 GFLOPS | Progress: (8/20) | 5.01 s
    [Task  7/25]  Current/Best:   16.17/  21.24 GFLOPS | Progress: (12/20) | 6.95 s
    [Task  7/25]  Current/Best:   12.29/  21.24 GFLOPS | Progress: (16/20) | 8.99 s
    [Task  7/25]  Current/Best:    6.41/  21.66 GFLOPS | Progress: (20/20) | 11.44 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.40/  14.43 GFLOPS | Progress: (4/20) | 2.91 s
    [Task  8/25]  Current/Best:    9.61/  14.43 GFLOPS | Progress: (8/20) | 7.90 s
    [Task  8/25]  Current/Best:   12.83/  14.43 GFLOPS | Progress: (12/20) | 14.32 s
    [Task  8/25]  Current/Best:   18.64/  18.64 GFLOPS | Progress: (16/20) | 16.40 s
    [Task  8/25]  Current/Best:   19.99/  19.99 GFLOPS | Progress: (20/20) | 23.38 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.36/  15.53 GFLOPS | Progress: (4/20) | 11.95 s
    [Task  9/25]  Current/Best:   23.31/  23.31 GFLOPS | Progress: (8/20) | 13.80 s
    [Task  9/25]  Current/Best:    8.30/  23.31 GFLOPS | Progress: (12/20) | 16.30 s
    [Task  9/25]  Current/Best:   18.00/  23.31 GFLOPS | Progress: (16/20) | 19.10 s
    [Task  9/25]  Current/Best:    9.07/  23.31 GFLOPS | Progress: (20/20) | 27.43 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   17.89/  17.89 GFLOPS | Progress: (4/20) | 2.51 s
    [Task 10/25]  Current/Best:   15.41/  17.89 GFLOPS | Progress: (8/20) | 4.13 s
    [Task 10/25]  Current/Best:   12.20/  19.05 GFLOPS | Progress: (12/20) | 5.68 s
    [Task 10/25]  Current/Best:   19.02/  20.62 GFLOPS | Progress: (16/20) | 6.79 s
    [Task 10/25]  Current/Best:    8.83/  20.62 GFLOPS | Progress: (20/20
 ) | 8.35 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.30/  18.12 GFLOPS | Progress: (4/20) | 3.38 s
    [Task 11/25]  Current/Best:   16.86/  18.12 GFLOPS | Progress: (8/20) | 6.22 s
    [Task 11/25]  Current/Best:   17.67/  18.12 GFLOPS | Progress: (12/20) | 8.26 s
    [Task 11/25]  Current/Best:   13.55/  21.18 GFLOPS | Progress: (16/20) | 11.12 s
    [Task 11/25]  Current/Best:   19.50/  21.56 GFLOPS | Progress: (20/20) | 13.20 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.82/  18.06 GFLOPS | Progress: (4/20) | 5.69 s
    [Task 12/25]  Current/Best:    5.33/  18.06 GFLOPS | Progress: (8/20) | 9.57 s
    [Task 12/25]  Current/Best:   19.20/  19.20 GFLOPS | Progress: (12/20) | 11.53 s
    [Task 12/25]  Current/Best:   15.27/  19.20 GFLOPS | Progress: (16/20) | 14.46 s
    [Task 12/25]  Current/Best:   15.12/  19.20 GFLOPS | Progress: (20/20) | 16.37 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.99/  17.37 GFLOPS | Progress: (4/20) | 3.76 s
    [Task 13/25]  Current/Best:   15.66/  21.06 GFLOPS | Progress: (8/20) | 6.35 s
    [Task 13/25]  Current/Best:   19.54/  21.49 GFLOPS | Progress: (12/20) | 9.36 s
    [Task 13/25]  Current/Best:   12.24/  21.49 GFLOPS | Progress: (16/20) | 12.82 s
    [Task 13/25]  Current/Best:   18.73/  21.49 GFLOPS | Progress: (20/20) | 15.11 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.52/  13.52 GFLOPS | Progress: (4/20) | 3.32 s
    [Task 14/25]  Current/Best:    6.07/  13.52 GFLOPS | Progress: (8/20) | 5.50 s
    [Task 14/25]  Current/Best:   20.40/  20.40 GFLOPS | Progress: (12/20) | 8.16 s
    [Task 14/25]  Current/Best:   17.05/  20.40 GFLOPS | Progress: (16/20) | 9.84 s Done.
+
    [Task 14/25]  Current/Best:   17.40/  20.40 GFLOPS | Progress: (20/20) | 11.56 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.11/  17.65 GFLOPS | Progress: (4/20) | 2.75 s
    [Task 15/25]  Current/Best:   14.39/  17.94 GFLOPS | Progress: (8/20) | 4.05 s
    [Task 15/25]  Current/Best:   10.35/  22.33 GFLOPS | Progress: (12/20) | 6.27 s
    [Task 15/25]  Current/Best:   20.39/  22.33 GFLOPS | Progress: (16/20) | 9.87 s
    [Task 15/25]  Current/Best:    9.61/  22.33 GFLOPS | Progress: (20/20) | 10.89 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.45/  20.45 GFLOPS | Progress: (4/20) | 3.00 s
    [Task 16/25]  Current/Best:    3.03/  20.45 GFLOPS | Progress: (8/20) | 4.62 s
    [Task 16/25]  Current/Best:   18.88/  20.45 GFLOPS | Progress: (12/20) | 5.84 s
    [Task 16/25]  Current/Best:   17.71/  20.45 GFLOPS | Progress: (16/20) |
  7.20 s
    [Task 16/25]  Current/Best:   10.05/  21.77 GFLOPS | Progress: (20/20) | 9.34 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.58/  18.84 GFLOPS | Progress: (4/20) | 4.77 s
    [Task 17/25]  Current/Best:   14.40/  23.00 GFLOPS | Progress: (8/20) | 7.70 s
    [Task 17/25]  Current/Best:   16.72/  23.00 GFLOPS | Progress: (12/20) | 9.78 s
    [Task 17/25]  Current/Best:   16.53/  23.00 GFLOPS | Progress: (16/20) | 11.97 s
    [Task 17/25]  Current/Best:   10.04/  23.00 GFLOPS | Progress: (20/20) | 14.13 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.47/  17.82 GFLOPS | Progress: (4/20) | 3.79 s
    [Task 18/25]  Current/Best:   10.58/  17.82 GFLOPS | Progress: (8/20) | 7.45 s
    [Task 18/25]  Current/Best:   19.44/  19.44 GFLOPS | Progress: (12/20) | 9.38 s
    [Task 18/25]  Current/Best:    9.95/  19.44 GFLOPS | Progress: (16/20) | 13.22 s
    [Task 18/25]  Current/Best:   20.55/  20.55 GFLOPS | Progress: (20/20) | 14.73 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.17/  20.11 GFLOPS | Progress: (4/20) | 6.02 s
    [Task 19/25]  Current/Best:    2.60/  20.11 GFLOPS | Progress: (8/20) | 9.36 s
    [Task 19/25]  Current/Best:   18.64/  20.84 GFLOPS | Progress: (12/20) | 12.29 s
    [Task 19/25]  Current/Best:   15.08/  21.48 GFLOPS | Progress: (16/20) | 15.28 s
    [Task 19/25]  Current/Best:    2.70/  23.15 GFLOPS | Progress: (20/20) | 18.06 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.38/  15.41 GFLOPS | Progress: (4/20) | 3.28 s Done.
      Done.
-
    [Task 20/25]  Current/Best:    9.81/  15.07 GFLOPS | Progress: (8/20) | 6.71 s
    [Task 20/25]  Current/Best:    2.32/  16.65 GFLOPS | Progress: (12/20) | 10.57 s
    [Task 20/25]  Current/Best:   12.36/  16.65 GFLOPS | Progress: (16/20) | 14.09 s
    [Task 20/25]  Current/Best:   11.74/  22.31 GFLOPS | Progress: (20/20) | 16.19 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.41/  17.71 GFLOPS | Progress: (4/20) | 3.18 s
    [Task 21/25]  Current/Best:   14.64/  17.71 GFLOPS | Progress: (8/20) | 4.70 s
    [Task 21/25]  Current/Best:    1.61/  17.71 GFLOPS | Progress: (12/20) | 6.82 s
    [Task 21/25]  Current/Best:   17.97/  17.97 GFLOPS | Progress: (16/20) | 10.21 s
    [Task 21/25]  Current/Best:    4.44/  17.97 GFLOPS | Progress: (20/20) | 17.25 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  17.02 GFLOPS | Progress: (4/20
 ) | 2.64 s
    [Task 22/25]  Current/Best:    8.65/  21.98 GFLOPS | Progress: (8/20) | 4.60 s
    [Task 22/25]  Current/Best:   19.83/  21.98 GFLOPS | Progress: (12/20) | 6.87 s
    [Task 22/25]  Current/Best:   15.40/  21.98 GFLOPS | Progress: (16/20) | 8.89 s
    [Task 22/25]  Current/Best:   13.94/  21.98 GFLOPS | Progress: (20/20) | 10.59 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.57/  20.89 GFLOPS | Progress: (4/20) | 3.20 s
    [Task 23/25]  Current/Best:   14.49/  20.89 GFLOPS | Progress: (8/20) | 6.56 s
    [Task 23/25]  Current/Best:   21.02/  21.75 GFLOPS | Progress: (12/20) | 8.34 s
    [Task 23/25]  Current/Best:    6.50/  21.75 GFLOPS | Progress: (16/20) | 15.29 s
    [Task 23/25]  Current/Best:    7.90/  21.75 GFLOPS | Progress: (20/20) | 19.45 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.44/   8.44 GFLOPS | Progress: (4/20) | 11.76 s
    [Task 24/25]  Current/Best:    3.56/   8.44 GFLOPS | Progress: (8/20) | 22.96 s
    [Task 24/25]  Current/Best:    4.28/   8.44 GFLOPS | Progress: (12/20) | 33.68 s Done.
+
    [Task 20/25]  Current/Best:   10.30/  15.41 GFLOPS | Progress: (8/20) | 6.67 s
    [Task 20/25]  Current/Best:    2.32/  15.87 GFLOPS | Progress: (12/20) | 10.64 s
    [Task 20/25]  Current/Best:   12.28/  15.87 GFLOPS | Progress: (16/20) | 14.50 s
    [Task 20/25]  Current/Best:   13.59/  21.96 GFLOPS | Progress: (20/20) | 16.63 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.40/  17.67 GFLOPS | Progress: (4/20) | 3.30 s
    [Task 21/25]  Current/Best:   14.56/  17.67 GFLOPS | Progress: (8/20) | 4.92 s
    [Task 21/25]  Current/Best:    1.61/  17.67 GFLOPS | Progress: (12/20) | 7.07 s
    [Task 21/25]  Current/Best:   18.13/  18.13 GFLOPS | Progress: (16/20) | 10.61 s
    [Task 21/25]  Current/Best:    4.47/  18.13 GFLOPS | Progress: (20/20) | 17.86 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  17.01 GFLOPS | Progress: (4/20
 ) | 2.69 s
    [Task 22/25]  Current/Best:    9.21/  21.99 GFLOPS | Progress: (8/20) | 4.70 s
    [Task 22/25]  Current/Best:   19.90/  21.99 GFLOPS | Progress: (12/20) | 7.05 s
    [Task 22/25]  Current/Best:   14.75/  21.99 GFLOPS | Progress: (16/20) | 9.16 s
    [Task 22/25]  Current/Best:   14.67/  21.99 GFLOPS | Progress: (20/20) | 10.91 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.41/  20.26 GFLOPS | Progress: (4/20) | 3.26 s
    [Task 23/25]  Current/Best:   14.94/  20.26 GFLOPS | Progress: (8/20) | 6.73 s
    [Task 23/25]  Current/Best:   20.73/  21.45 GFLOPS | Progress: (12/20) | 8.58 s
    [Task 23/25]  Current/Best:    6.38/  21.45 GFLOPS | Progress: (16/20) | 15.62 s
    [Task 23/25]  Current/Best:    7.86/  21.45 GFLOPS | Progress: (20/20) | 19.85 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.25/   8.25 GFLOPS | Progress: (4/20) | 11.73 s
    [Task 24/25]  Current/Best:    3.50/   8.25 GFLOPS | Progress: (8/20) | 23.01 s
    [Task 24/25]  Current/Best:    4.45/   8.25 GFLOPS | Progress: (12/20) | 33.72 s Done.
      Done.
-
    [Task 24/25]  Current/Best:    6.75/   8.78 GFLOPS | Progress: (16/20) | 39.08 s
    [Task 24/25]  Current/Best:    3.41/   8.78 GFLOPS | Progress: (20/20) | 44.88 s Done.
-
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.73 GFLOPS | Progress: (4/20) | 11.58 s
    [Task 25/25]  Current/Best:    6.16/   8.32 GFLOPS | Progress: (8/20) | 22.82 s
    [Task 25/25]  Current/Best:    5.89/   8.32 GFLOPS | Progress: (12/20) | 34.08 s
    [Task 25/25]  Current/Best:    5.75/   9.10 GFLOPS | Progress: (16/20) | 35.93 s
    [Task 25/25]  Current/Best:    2.83/   9.10 GFLOPS | Progress: (20/20) | 46.62 s
+
    [Task 24/25]  Current/Best:    7.15/   8.99 GFLOPS | Progress: (16/20) | 39.36 s
    [Task 24/25]  Current/Best:    3.26/   9.09 GFLOPS | Progress: (20/20) | 45.37 s Done.
+
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.94 GFLOPS | Progress: (4/20) | 11.57 s
    [Task 25/25]  Current/Best:    6.08/   8.32 GFLOPS | Progress: (8/20) | 22.85 s
    [Task 25/25]  Current/Best:    5.96/   8.32 GFLOPS | Progress: (12/20) | 34.36 s
    [Task 25/25]  Current/Best:    5.86/   9.14 GFLOPS | Progress: (16/20) | 36.08 s
    [Task 25/25]  Current/Best:    2.93/   9.18 GFLOPS | Progress: (20/20) | 46.78 s
 
 
 
@@ -735,8 +735,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 411.16385403000095, 'median': 410.9197546500127, 'std': 0.7949654033282724}
-    unoptimized: {'mean': 491.2560203000021, 'median': 491.31815010000537, 'std': 0.38931689956801807}
+    optimized: {'mean': 412.91961946000356, 'median': 412.911717399993, 'std': 1.218757633834169}
+    unoptimized: {'mean': 494.2233797100016, 'median': 493.65905760000715, 'std': 1.3174742070766543}
 
 
 
@@ -759,7 +759,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 10 minutes  8.140 seconds)
+   **Total running time of the script:** ( 10 minutes  21.163 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 8a0693263..4ebdfa9da 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -269,7 +269,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.249e-07 secs/op
+    1.31e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index 13210dad0..8cbbbea30 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -262,7 +262,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x15e5f680)), stage(b, placeholder(b, 0x9d2e740)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
+    [stage(a, placeholder(a, 0xca59e00)), stage(b, placeholder(b, 0x16dece80)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index 4d002a5cc..04044de8c 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,30 +5,30 @@
 
 Computation times
 =================
-**12:51.660** total execution time for **tutorial** files:
+**13:02.518** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:08.140 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:21.163 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:59.193 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:59.937 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 00:50.801 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 00:47.056 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:28.145 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:28.618 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.046 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.375 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.688 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.687 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.509 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.513 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.138 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.169 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)   | 00:00.000 | 0.0 MB |
-+------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)   | 00:00.000 | 0.0 MB |
++------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_install.py` (``install.py``)                                     | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 87bf92009..b6f938622 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -288,7 +288,7 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000009
+    Numpy running time: 0.000008
     naive: 0.000006
 
 
@@ -390,7 +390,7 @@ compile and run this new schedule with the parallel operation applied:
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallel: 0.000007
+    parallel: 0.000008
 
 
 
@@ -447,7 +447,7 @@ factor to be the number of threads on your CPU.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    vector: 0.000025
+    vector: 0.000026
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [(stride: int32*n: int32)], [], type="auto"),
@@ -499,10 +499,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    8.638790000077279e-06                    1.0
-                   naive              6.0278e-06      0.6977597557002865
-                parallel              6.9339e-06      0.8026471299728286
-                  vector    2.4649299999999998e-05      2.85332783871115
+                   numpy    8.112189998428221e-06                    1.0
+                   naive    5.8665000000000005e-06    0.7231709317874292
+                parallel              7.8095e-06       0.962687018118798
+                  vector             2.56735e-05      3.1648050655833226
 
 
 
@@ -923,7 +923,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.019097
+    Numpy running time: 0.018615
 
 
 
@@ -983,7 +983,7 @@ optimizations.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    none: 3.264315
+    none: 3.338917
 
 
 
@@ -1088,7 +1088,7 @@ schedule.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    blocking: 0.309419
+    blocking: 0.304365
 
 
 
@@ -1186,7 +1186,7 @@ already cache friendly from our previous optimizations.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    vectorization: 0.342478
+    vectorization: 0.339117
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1262,7 +1262,7 @@ more cache friendly.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    loop permutation: 0.116282
+    loop permutation: 0.116352
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1363,7 +1363,7 @@ optimized schedule.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    array packing: 0.109478
+    array packing: 0.108190
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1458,7 +1458,7 @@ to `C` when all the block results are ready.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    block caching: 0.111235
+    block caching: 0.110560
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1546,7 +1546,7 @@ of thread-level parallelization.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallelization: 0.144030
+    parallelization: 0.145880
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1627,13 +1627,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none            3.2643151754                     1.0
-                blocking     0.30941907280000003     0.09478835718186578
-           vectorization     0.34247802250000003      0.1049157339588184
-        loop permutation     0.11628221949999999     0.03562224027149924
-           array packing            0.1094781539     0.03353786261971008
-           block caching     0.11123485749999999     0.03407601641479658
-         parallelization            0.1440301446     0.04412262200825964
+                    none            3.3389173399                     1.0
+                blocking     0.30436526659999996      0.0911568738054221
+           vectorization     0.33911735190000003      0.1015650635754153
+        loop permutation            0.1163522494     0.03484729855680845
+           array packing     0.10819025600000001    0.032402795573022565
+           block caching            0.1105603445    0.033112633001963224
+         parallelization            0.1458796636     0.04369070831935272
 
 
 
diff --git a/docs/commit_hash b/docs/commit_hash
index bda59f0fd..bdd4d6642 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-160b1ba4bd70871433daf0af5cc91283960bd949
+d4be49aec62299275565066b56a0555bafc2ccac
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 35fb2a6dd..bdb0e6364 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -422,7 +422,7 @@ to download the full example code</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipad910f60-0d6a-4f9e-be31-d60e12c805a4 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip150fb657-ad5f-41ad-b546-fc435a8b919b from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index df6729837..6836a6c54 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -427,40 +427,3308 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
-  0%|          | 16.0k/41.5M [00:00&lt;08:06, 89.3kB/s]
-  0%|          | 48.0k/41.5M [00:00&lt;05:07, 141kB/s]
-  0%|          | 96.0k/41.5M [00:00&lt;03:38, 199kB/s]
-  0%|          | 160k/41.5M [00:00&lt;02:46, 261kB/s]
-  1%|          | 320k/41.5M [00:00&lt;01:28, 487kB/s]
-  2%|1         | 648k/41.5M [00:01&lt;00:45, 941kB/s]
-  3%|3         | 1.27M/41.5M [00:01&lt;00:23, 1.82MB/s]
-  6%|6         | 2.55M/41.5M [00:01&lt;00:11, 3.55MB/s]
- 10%|9         | 4.05M/41.5M [00:01&lt;00:07, 5.12MB/s]
- 13%|#3        | 5.56M/41.5M [00:01&lt;00:06, 6.19MB/s]
- 17%|#7        | 7.07M/41.5M [00:02&lt;00:05, 6.92MB/s]
- 21%|##        | 8.58M/41.5M [00:02&lt;00:04, 7.42MB/s]
- 24%|##4       | 10.1M/41.5M [00:02&lt;00:04, 7.74MB/s]
- 28%|##7       | 11.6M/41.5M [00:02&lt;00:03, 8.00MB/s]
- 32%|###1      | 13.1M/41.5M [00:02&lt;00:03, 8.17MB/s]
- 35%|###5      | 14.6M/41.5M [00:02&lt;00:03, 8.28MB/s]
- 39%|###8      | 16.1M/41.5M [00:03&lt;00:03, 8.35MB/s]
- 42%|####2     | 17.6M/41.5M [00:03&lt;00:02, 8.42MB/s]
- 46%|####6     | 19.1M/41.5M [00:03&lt;00:02, 8.46MB/s]
- 50%|####9     | 20.6M/41.5M [00:03&lt;00:02, 8.49MB/s]
- 53%|#####3    | 22.1M/41.5M [00:03&lt;00:02, 8.52MB/s]
- 57%|#####6    | 23.6M/41.5M [00:04&lt;00:02, 8.52MB/s]
- 61%|######    | 25.1M/41.5M [00:04&lt;00:02, 8.54MB/s]
- 64%|######4   | 26.6M/41.5M [00:04&lt;00:01, 8.56MB/s]
- 68%|######7   | 28.1M/41.5M [00:04&lt;00:01, 8.55MB/s]
- 71%|#######1  | 29.6M/41.5M [00:04&lt;00:01, 8.56MB/s]
- 75%|#######5  | 31.2M/41.5M [00:04&lt;00:01, 8.56MB/s]
- 79%|#######8  | 32.7M/41.5M [00:05&lt;00:01, 8.57MB/s]
- 82%|########2 | 34.2M/41.5M [00:05&lt;00:00, 8.56MB/s]
- 86%|########5 | 35.7M/41.5M [00:05&lt;00:00, 8.55MB/s]
- 90%|########9 | 37.2M/41.5M [00:05&lt;00:00, 8.56MB/s]
- 93%|#########3| 38.7M/41.5M [00:05&lt;00:00, 8.56MB/s]
- 97%|#########6| 40.2M/41.5M [00:06&lt;00:00, 8.55MB/s]
-100%|##########| 41.5M/41.5M [00:06&lt;00:00, 7.13MB/s]
+  0%|          | 16.0k/41.5M [00:00&lt;07:24, 97.9kB/s]
+  0%|          | 48.0k/41.5M [00:00&lt;04:40, 155kB/s]
+  0%|          | 64.0k/41.5M [00:00&lt;05:37, 129kB/s]
+  0%|          | 152k/41.5M [00:00&lt;02:29, 289kB/s]
+  0%|          | 200k/41.5M [00:00&lt;02:29, 290kB/s]
+  1%|          | 256k/41.5M [00:01&lt;02:20, 308kB/s]
+  1%|          | 288k/41.5M [00:01&lt;02:39, 270kB/s]
+  1%|          | 320k/41.5M [00:01&lt;02:55, 246kB/s]
+  1%|          | 352k/41.5M [00:01&lt;03:07, 230kB/s]
+  1%|          | 376k/41.5M [00:01&lt;03:31, 204kB/s]
+  1%|          | 400k/41.5M [00:01&lt;03:51, 186kB/s]
+  1%|          | 424k/41.5M [00:02&lt;04:07, 174kB/s]
+  1%|1         | 448k/41.5M [00:02&lt;05:39, 127kB/s]
+  1%|1         | 464k/41.5M [00:02&lt;05:59, 120kB/s]
+  1%|1         | 496k/41.5M [00:02&lt;05:08, 139kB/s]
+  1%|1         | 512k/41.5M [00:03&lt;07:07, 100kB/s]
+  1%|1         | 536k/41.5M [00:03&lt;06:26, 111kB/s]
+  1%|1         | 552k/41.5M [00:03&lt;06:38, 108kB/s]
+  1%|1         | 568k/41.5M [00:03&lt;06:49, 105kB/s]
+  1%|1         | 584k/41.5M [00:03&lt;08:51, 80.7kB/s]
+  1%|1         | 600k/41.5M [00:04&lt;08:26, 84.6kB/s]
+  1%|1         | 616k/41.5M [00:04&lt;08:08, 87.8kB/s]
+  1%|1         | 632k/41.5M [00:04&lt;09:59, 71.5kB/s]
+  2%|1         | 648k/41.5M [00:04&lt;11:20, 62.9kB/s]
+  2%|1         | 664k/41.5M [00:05&lt;11:49, 60.4kB/s]
+  2%|1         | 672k/41.5M [00:05&lt;12:18, 58.0kB/s]
+  2%|1         | 680k/41.5M [00:05&lt;12:46, 55.9kB/s]
+  2%|1         | 696k/41.5M [00:05&lt;11:25, 62.5kB/s]
+  2%|1         | 704k/41.5M [00:05&lt;12:04, 59.0kB/s]
+  2%|1         | 712k/41.5M [00:06&lt;11:53, 60.0kB/s]
+  2%|1         | 728k/41.5M [00:06&lt;10:45, 66.2kB/s]
+  2%|1         | 736k/41.5M [00:06&lt;11:36, 61.4kB/s]
+  2%|1         | 744k/41.5M [00:06&lt;11:31, 61.8kB/s]
+  2%|1         | 760k/41.5M [00:06&lt;10:29, 67.8kB/s]
+  2%|1         | 768k/41.5M [00:06&lt;10:40, 66.7kB/s]
+  2%|1         | 776k/41.5M [00:07&lt;12:27, 57.2kB/s]
+  2%|1         | 800k/41.5M [00:07&lt;08:35, 82.9kB/s]
+  2%|1         | 816k/41.5M [00:07&lt;08:09, 87.1kB/s]
+  2%|1         | 832k/41.5M [00:07&lt;10:12, 69.6kB/s]
+  2%|1         | 848k/41.5M [00:07&lt;09:18, 76.4kB/s]
+  2%|2         | 864k/41.5M [00:08&lt;10:55, 65.0kB/s]
+  2%|2         | 888k/41.5M [00:08&lt;08:30, 83.4kB/s]
+  2%|2         | 904k/41.5M [00:08&lt;10:10, 69.7kB/s]
+  2%|2         | 920k/41.5M [00:09&lt;11:25, 62.1kB/s]
+  2%|2         | 944k/41.5M [00:09&lt;08:56, 79.2kB/s]
+  2%|2         | 960k/41.5M [00:09&lt;08:30, 83.3kB/s]
+  2%|2         | 976k/41.5M [00:09&lt;08:10, 86.7kB/s]
+  2%|2         | 992k/41.5M [00:09&lt;09:57, 71.1kB/s]
+  2%|2         | 0.99M/41.5M [00:10&lt;08:02, 88.0kB/s]
+  2%|2         | 1.01M/41.5M [00:10&lt;09:45, 72.6kB/s]
+  2%|2         | 1.02M/41.5M [00:10&lt;09:35, 73.7kB/s]
+  3%|2         | 1.04M/41.5M [00:10&lt;08:56, 79.1kB/s]
+  3%|2         | 1.05M/41.5M [00:11&lt;09:59, 70.8kB/s]
+  3%|2         | 1.06M/41.5M [00:11&lt;10:45, 65.7kB/s]
+  3%|2         | 1.08M/41.5M [00:11&lt;09:36, 73.5kB/s]
+  3%|2         | 1.09M/41.5M [00:11&lt;08:51, 79.6kB/s]
+  3%|2         | 1.11M/41.5M [00:11&lt;08:56, 78.9kB/s]
+  3%|2         | 1.12M/41.5M [00:11&lt;09:16, 76.1kB/s]
+  3%|2         | 1.14M/41.5M [00:12&lt;07:52, 89.6kB/s]
+  3%|2         | 1.16M/41.5M [00:12&lt;07:40, 91.8kB/s]
+  3%|2         | 1.17M/41.5M [00:12&lt;07:33, 93.2kB/s]
+  3%|2         | 1.19M/41.5M [00:12&lt;10:10, 69.3kB/s]
+  3%|2         | 1.21M/41.5M [00:13&lt;08:47, 80.1kB/s]
+  3%|2         | 1.23M/41.5M [00:13&lt;11:02, 63.7kB/s]
+  3%|2         | 1.24M/41.5M [00:13&lt;12:00, 58.6kB/s]
+  3%|3         | 1.26M/41.5M [00:14&lt;12:42, 55.4kB/s]
+  3%|3         | 1.27M/41.5M [00:14&lt;17:52, 39.3kB/s]
+  3%|3         | 1.28M/41.5M [00:15&lt;19:06, 36.8kB/s]
+  3%|3         | 1.30M/41.5M [00:15&lt;17:37, 39.9kB/s]
+  3%|3         | 1.30M/41.5M [00:15&lt;17:01, 41.2kB/s]
+  3%|3         | 1.31M/41.5M [00:15&lt;16:29, 42.6kB/s]
+  3%|3         | 1.32M/41.5M [00:16&lt;16:00, 43.8kB/s]
+  3%|3         | 1.33M/41.5M [00:16&lt;15:37, 44.9kB/s]
+  3%|3         | 1.34M/41.5M [00:16&lt;15:18, 45.9kB/s]
+  3%|3         | 1.34M/41.5M [00:16&lt;15:03, 46.6kB/s]
+  3%|3         | 1.35M/41.5M [00:16&lt;14:52, 47.1kB/s]
+  3%|3         | 1.36M/41.5M [00:16&lt;14:44, 47.6kB/s]
+  3%|3         | 1.38M/41.5M [00:17&lt;11:21, 61.7kB/s]
+  3%|3         | 1.38M/41.5M [00:17&lt;12:05, 58.0kB/s]
+  3%|3         | 1.40M/41.5M [00:17&lt;10:05, 69.4kB/s]
+  3%|3         | 1.41M/41.5M [00:17&lt;14:18, 48.9kB/s]
+  3%|3         | 1.42M/41.5M [00:17&lt;11:30, 60.8kB/s]
+  3%|3         | 1.44M/41.5M [00:18&lt;09:57, 70.2kB/s]
+  4%|3         | 1.45M/41.5M [00:18&lt;09:02, 77.4kB/s]
+  4%|3         | 1.47M/41.5M [00:18&lt;08:25, 83.0kB/s]
+  4%|3         | 1.48M/41.5M [00:18&lt;10:18, 67.9kB/s]
+  4%|3         | 1.50M/41.5M [00:18&lt;09:19, 75.0kB/s]
+  4%|3         | 1.52M/41.5M [00:19&lt;08:39, 80.7kB/s]
+  4%|3         | 1.53M/41.5M [00:19&lt;08:12, 85.0kB/s]
+  4%|3         | 1.55M/41.5M [00:19&lt;08:27, 82.6kB/s]
+  4%|3         | 1.56M/41.5M [00:19&lt;09:40, 72.2kB/s]
+  4%|3         | 1.58M/41.5M [00:19&lt;08:55, 78.2kB/s]
+  4%|3         | 1.59M/41.5M [00:20&lt;09:28, 73.6kB/s]
+  4%|3         | 1.61M/41.5M [00:20&lt;08:47, 79.3kB/s]
+  4%|3         | 1.62M/41.5M [00:20&lt;09:21, 74.5kB/s]
+  4%|3         | 1.63M/41.5M [00:20&lt;12:45, 54.6kB/s]
+  4%|3         | 1.66M/41.5M [00:21&lt;10:21, 67.2kB/s]
+  4%|4         | 1.66M/41.5M [00:21&lt;11:01, 63.1kB/s]
+  4%|4         | 1.67M/41.5M [00:21&lt;11:39, 59.6kB/s]
+  4%|4         | 1.68M/41.5M [00:21&lt;11:34, 60.1kB/s]
+  4%|4         | 1.70M/41.5M [00:21&lt;10:27, 66.5kB/s]
+  4%|4         | 1.70M/41.5M [00:21&lt;10:37, 65.5kB/s]
+  4%|4         | 1.71M/41.5M [00:22&lt;12:12, 57.0kB/s]
+  4%|4         | 1.73M/41.5M [00:22&lt;10:09, 68.4kB/s]
+  4%|4         | 1.73M/41.5M [00:22&lt;11:04, 62.7kB/s]
+  4%|4         | 1.74M/41.5M [00:22&lt;11:51, 58.6kB/s]
+  4%|4         | 1.75M/41.5M [00:22&lt;12:28, 55.7kB/s]
+  4%|4         | 1.77M/41.5M [00:23&lt;10:12, 68.0kB/s]
+  4%|4         | 1.77M/41.5M [00:23&lt;11:08, 62.3kB/s]
+  4%|4         | 1.79M/41.5M [00:23&lt;09:32, 72.7kB/s]
+  4%|4         | 1.80M/41.5M [00:23&lt;13:45, 50.4kB/s]
+  4%|4         | 1.81M/41.5M [00:23&lt;11:10, 62.0kB/s]
+  4%|4         | 1.83M/41.5M [00:24&lt;09:44, 71.1kB/s]
+  4%|4         | 1.84M/41.5M [00:24&lt;13:32, 51.2kB/s]
+  4%|4         | 1.86M/41.5M [00:24&lt;09:26, 73.4kB/s]
+  5%|4         | 1.88M/41.5M [00:25&lt;13:05, 52.9kB/s]
+  5%|4         | 1.89M/41.5M [00:25&lt;11:16, 61.4kB/s]
+  5%|4         | 1.90M/41.5M [00:25&lt;11:47, 58.7kB/s]
+  5%|4         | 1.91M/41.5M [00:25&lt;12:17, 56.3kB/s]
+  5%|4         | 1.91M/41.5M [00:25&lt;12:42, 54.4kB/s]
+  5%|4         | 1.93M/41.5M [00:25&lt;11:14, 61.5kB/s]
+  5%|4         | 1.94M/41.5M [00:26&lt;11:52, 58.2kB/s]
+  5%|4         | 1.95M/41.5M [00:26&lt;10:02, 68.8kB/s]
+  5%|4         | 1.96M/41.5M [00:26&lt;13:59, 49.3kB/s]
+  5%|4         | 1.98M/41.5M [00:26&lt;09:33, 72.2kB/s]
+  5%|4         | 2.00M/41.5M [00:27&lt;10:25, 66.2kB/s]
+  5%|4         | 2.02M/41.5M [00:27&lt;09:23, 73.5kB/s]
+  5%|4         | 2.02M/41.5M [00:27&lt;10:56, 63.0kB/s]
+  5%|4         | 2.04M/41.5M [00:27&lt;12:02, 57.3kB/s]
+  5%|4         | 2.05M/41.5M [00:28&lt;10:58, 62.8kB/s]
+  5%|4         | 2.06M/41.5M [00:28&lt;11:34, 59.5kB/s]
+  5%|4         | 2.07M/41.5M [00:28&lt;12:07, 56.8kB/s]
+  5%|5         | 2.08M/41.5M [00:28&lt;12:35, 54.7kB/s]
+  5%|5         | 2.09M/41.5M [00:28&lt;12:58, 53.1kB/s]
+  5%|5         | 2.09M/41.5M [00:28&lt;12:48, 53.8kB/s]
+  5%|5         | 2.10M/41.5M [00:29&lt;17:02, 40.4kB/s]
+  5%|5         | 2.11M/41.5M [00:29&lt;16:13, 42.4kB/s]
+  5%|5         | 2.12M/41.5M [00:29&lt;16:08, 42.6kB/s]
+  5%|5         | 2.12M/41.5M [00:29&lt;15:03, 45.7kB/s]
+  5%|5         | 2.13M/41.5M [00:29&lt;15:17, 45.0kB/s]
+  5%|5         | 2.14M/41.5M [00:30&lt;18:36, 36.9kB/s]
+  5%|5         | 2.15M/41.5M [00:30&lt;17:16, 39.8kB/s]
+  5%|5         | 2.16M/41.5M [00:30&lt;21:02, 32.7kB/s]
+  5%|5         | 2.17M/41.5M [00:31&lt;20:44, 33.1kB/s]
+  5%|5         | 2.18M/41.5M [00:31&lt;19:05, 36.0kB/s]
+  5%|5         | 2.19M/41.5M [00:31&lt;17:47, 38.6kB/s]
+  5%|5         | 2.20M/41.5M [00:31&lt;20:37, 33.3kB/s]
+  5%|5         | 2.20M/41.5M [00:32&lt;18:48, 36.5kB/s]
+  5%|5         | 2.21M/41.5M [00:32&lt;17:27, 39.3kB/s]
+  5%|5         | 2.22M/41.5M [00:32&lt;16:29, 41.6kB/s]
+  5%|5         | 2.23M/41.5M [00:32&lt;15:47, 43.5kB/s]
+  5%|5         | 2.24M/41.5M [00:32&lt;11:48, 58.1kB/s]
+  5%|5         | 2.25M/41.5M [00:33&lt;15:49, 43.3kB/s]
+  5%|5         | 2.27M/41.5M [00:33&lt;12:12, 56.2kB/s]
+  5%|5         | 2.27M/41.5M [00:33&lt;12:37, 54.3kB/s]
+  5%|5         | 2.28M/41.5M [00:33&lt;12:58, 52.8kB/s]
+  6%|5         | 2.29M/41.5M [00:33&lt;13:15, 51.6kB/s]
+  6%|5         | 2.30M/41.5M [00:33&lt;10:36, 64.5kB/s]
+  6%|5         | 2.31M/41.5M [00:34&lt;11:24, 60.0kB/s]
+  6%|5         | 2.32M/41.5M [00:34&lt;12:04, 56.7kB/s]
+  6%|5         | 2.34M/41.5M [00:34&lt;09:58, 68.6kB/s]
+  6%|5         | 2.34M/41.5M [00:34&lt;10:54, 62.7kB/s]
+  6%|5         | 2.35M/41.5M [00:34&lt;11:40, 58.6kB/s]
+  6%|5         | 2.37M/41.5M [00:34&lt;09:45, 70.1kB/s]
+  6%|5         | 2.38M/41.5M [00:35&lt;08:44, 78.2kB/s]
+  6%|5         | 2.40M/41.5M [00:35&lt;08:08, 83.9kB/s]
+  6%|5         | 2.41M/41.5M [00:35&lt;07:45, 88.0kB/s]
+  6%|5         | 2.43M/41.5M [00:35&lt;07:31, 90.8kB/s]
+  6%|5         | 2.45M/41.5M [00:35&lt;07:21, 92.7kB/s]
+  6%|5         | 2.47M/41.5M [00:36&lt;08:09, 83.6kB/s]
+  6%|6         | 2.49M/41.5M [00:36&lt;06:52, 99.0kB/s]
+  6%|6         | 2.51M/41.5M [00:36&lt;08:43, 78.1kB/s]
+  6%|6         | 2.53M/41.5M [00:36&lt;07:16, 93.6kB/s]
+  6%|6         | 2.55M/41.5M [00:36&lt;07:11, 94.5kB/s]
+  6%|6         | 2.56M/41.5M [00:37&lt;07:08, 95.3kB/s]
+  6%|6         | 2.58M/41.5M [00:37&lt;07:33, 90.1kB/s]
+  6%|6         | 2.59M/41.5M [00:37&lt;07:23, 92.0kB/s]
+  6%|6         | 2.61M/41.5M [00:37&lt;08:48, 77.2kB/s]
+  6%|6         | 2.62M/41.5M [00:37&lt;08:44, 77.6kB/s]
+  6%|6         | 2.63M/41.5M [00:38&lt;09:38, 70.4kB/s]
+  6%|6         | 2.65M/41.5M [00:38&lt;08:16, 82.1kB/s]
+  6%|6         | 2.66M/41.5M [00:38&lt;08:19, 81.5kB/s]
+  6%|6         | 2.68M/41.5M [00:38&lt;10:05, 67.2kB/s]
+  6%|6         | 2.70M/41.5M [00:38&lt;09:06, 74.4kB/s]
+  7%|6         | 2.70M/41.5M [00:39&lt;09:27, 71.6kB/s]
+  7%|6         | 2.71M/41.5M [00:39&lt;11:42, 57.9kB/s]
+  7%|6         | 2.73M/41.5M [00:39&lt;11:20, 59.8kB/s]
+  7%|6         | 2.73M/41.5M [00:39&lt;11:52, 57.0kB/s]
+  7%|6         | 2.74M/41.5M [00:39&lt;12:20, 54.9kB/s]
+  7%|6         | 2.76M/41.5M [00:40&lt;12:57, 52.2kB/s]
+  7%|6         | 2.77M/41.5M [00:40&lt;13:10, 51.4kB/s]
+  7%|6         | 2.78M/41.5M [00:40&lt;10:45, 62.9kB/s]
+  7%|6         | 2.79M/41.5M [00:40&lt;11:26, 59.1kB/s]
+  7%|6         | 2.80M/41.5M [00:40&lt;09:43, 69.6kB/s]
+  7%|6         | 2.81M/41.5M [00:41&lt;13:35, 49.7kB/s]
+  7%|6         | 2.83M/41.5M [00:41&lt;11:03, 61.1kB/s]
+  7%|6         | 2.84M/41.5M [00:41&lt;11:39, 57.9kB/s]
+  7%|6         | 2.85M/41.5M [00:41&lt;09:52, 68.4kB/s]
+  7%|6         | 2.86M/41.5M [00:41&lt;10:44, 62.9kB/s]
+  7%|6         | 2.88M/41.5M [00:42&lt;09:17, 72.6kB/s]
+  7%|6         | 2.88M/41.5M [00:42&lt;10:15, 65.7kB/s]
+  7%|6         | 2.89M/41.5M [00:42&lt;11:06, 60.8kB/s]
+  7%|7         | 2.91M/41.5M [00:42&lt;09:26, 71.5kB/s]
+  7%|7         | 2.92M/41.5M [00:42&lt;08:31, 79.1kB/s]
+  7%|7         | 2.93M/41.5M [00:42&lt;09:36, 70.1kB/s]
+  7%|7         | 2.95M/41.5M [00:43&lt;08:37, 78.2kB/s]
+  7%|7         | 2.96M/41.5M [00:43&lt;10:25, 64.6kB/s]
+  7%|7         | 2.98M/41.5M [00:43&lt;08:24, 80.0kB/s]
+  7%|7         | 3.00M/41.5M [00:43&lt;07:58, 84.3kB/s]
+  7%|7         | 3.02M/41.5M [00:44&lt;07:39, 87.7kB/s]
+  7%|7         | 3.03M/41.5M [00:44&lt;09:28, 70.9kB/s]
+  7%|7         | 3.05M/41.5M [00:44&lt;08:42, 77.1kB/s]
+  7%|7         | 3.06M/41.5M [00:44&lt;08:10, 82.2kB/s]
+  7%|7         | 3.08M/41.5M [00:44&lt;07:47, 86.2kB/s]
+  7%|7         | 3.09M/41.5M [00:45&lt;07:31, 89.2kB/s]
+  7%|7         | 3.11M/41.5M [00:45&lt;07:19, 91.5kB/s]
+  8%|7         | 3.12M/41.5M [00:45&lt;07:11, 93.2kB/s]
+  8%|7         | 3.14M/41.5M [00:45&lt;07:06, 94.4kB/s]
+  8%|7         | 3.16M/41.5M [00:45&lt;07:01, 95.3kB/s]
+  8%|7         | 3.17M/41.5M [00:45&lt;06:59, 95.9kB/s]
+  8%|7         | 3.19M/41.5M [00:46&lt;06:57, 96.3kB/s]
+  8%|7         | 3.20M/41.5M [00:46&lt;06:55, 96.6kB/s]
+  8%|7         | 3.22M/41.5M [00:46&lt;06:54, 96.8kB/s]
+  8%|7         | 3.24M/41.5M [00:46&lt;05:59, 111kB/s]
+  8%|7         | 3.26M/41.5M [00:46&lt;08:05, 82.6kB/s]
+  8%|7         | 3.29M/41.5M [00:47&lt;07:33, 88.3kB/s]
+  8%|7         | 3.30M/41.5M [00:47&lt;07:47, 85.6kB/s]
+  8%|8         | 3.32M/41.5M [00:47&lt;08:05, 82.4kB/s]
+  8%|8         | 3.34M/41.5M [00:47&lt;09:35, 69.6kB/s]
+  8%|8         | 3.34M/41.5M [00:48&lt;10:14, 65.1kB/s]
+  8%|8         | 3.35M/41.5M [00:48&lt;10:52, 61.2kB/s]
+  8%|8         | 3.36M/41.5M [00:48&lt;11:28, 58.1kB/s]
+  8%|8         | 3.37M/41.5M [00:48&lt;10:59, 60.6kB/s]
+  8%|8         | 3.38M/41.5M [00:48&lt;10:11, 65.3kB/s]
+  8%|8         | 3.39M/41.5M [00:49&lt;10:57, 60.7kB/s]
+  8%|8         | 3.41M/41.5M [00:49&lt;09:21, 71.1kB/s]
+  8%|8         | 3.41M/41.5M [00:49&lt;10:17, 64.6kB/s]
+  8%|8         | 3.43M/41.5M [00:49&lt;08:58, 74.1kB/s]
+  8%|8         | 3.45M/41.5M [00:49&lt;08:12, 80.9kB/s]
+  8%|8         | 3.45M/41.5M [00:49&lt;09:18, 71.4kB/s]
+  8%|8         | 3.47M/41.5M [00:50&lt;08:24, 79.0kB/s]
+  8%|8         | 3.48M/41.5M [00:50&lt;07:51, 84.5kB/s]
+  8%|8         | 3.51M/41.5M [00:50&lt;06:27, 103kB/s]
+  8%|8         | 3.52M/41.5M [00:50&lt;06:33, 101kB/s]
+  9%|8         | 3.54M/41.5M [00:50&lt;06:38, 100kB/s]
+  9%|8         | 3.55M/41.5M [00:50&lt;06:42, 98.8kB/s]
+  9%|8         | 3.57M/41.5M [00:51&lt;06:43, 98.6kB/s]
+  9%|8         | 3.59M/41.5M [00:51&lt;06:45, 98.0kB/s]
+  9%|8         | 3.60M/41.5M [00:51&lt;08:47, 75.4kB/s]
+  9%|8         | 3.62M/41.5M [00:51&lt;08:12, 80.7kB/s]
+  9%|8         | 3.63M/41.5M [00:52&lt;09:48, 67.5kB/s]
+  9%|8         | 3.65M/41.5M [00:52&lt;08:53, 74.3kB/s]
+  9%|8         | 3.66M/41.5M [00:52&lt;08:16, 79.9kB/s]
+  9%|8         | 3.68M/41.5M [00:52&lt;07:50, 84.3kB/s]
+  9%|8         | 3.70M/41.5M [00:52&lt;09:33, 69.1kB/s]
+  9%|8         | 3.70M/41.5M [00:53&lt;12:40, 52.1kB/s]
+  9%|8         | 3.71M/41.5M [00:53&lt;19:33, 33.7kB/s]
+  9%|9         | 3.74M/41.5M [00:53&lt;10:54, 60.5kB/s]
+  9%|9         | 3.76M/41.5M [00:54&lt;09:45, 67.6kB/s]
+  9%|9         | 3.77M/41.5M [00:54&lt;10:49, 60.9kB/s]
+  9%|9         | 3.79M/41.5M [00:54&lt;09:39, 68.2kB/s]
+  9%|9         | 3.80M/41.5M [00:54&lt;08:48, 74.7kB/s]
+  9%|9         | 3.82M/41.5M [00:55&lt;10:11, 64.5kB/s]
+  9%|9         | 3.84M/41.5M [00:55&lt;07:59, 82.3kB/s]
+  9%|9         | 3.86M/41.5M [00:55&lt;07:39, 85.8kB/s]
+  9%|9         | 3.88M/41.5M [00:56&lt;17:09, 38.3kB/s]
+  9%|9         | 3.92M/41.5M [00:56&lt;08:50, 74.3kB/s]
+  9%|9         | 3.94M/41.5M [00:57&lt;09:47, 67.1kB/s]
+ 10%|9         | 3.95M/41.5M [00:57&lt;10:37, 61.7kB/s]
+ 10%|9         | 3.98M/41.5M [00:57&lt;08:36, 76.2kB/s]
+ 10%|9         | 3.99M/41.5M [00:57&lt;09:45, 67.1kB/s]
+ 10%|9         | 4.01M/41.5M [00:58&lt;11:06, 59.0kB/s]
+ 10%|9         | 4.02M/41.5M [00:58&lt;10:57, 59.7kB/s]
+ 10%|9         | 4.02M/41.5M [00:58&lt;11:59, 54.6kB/s]
+ 10%|9         | 4.03M/41.5M [00:58&lt;11:37, 56.3kB/s]
+ 10%|9         | 4.05M/41.5M [00:58&lt;10:25, 62.8kB/s]
+ 10%|9         | 4.05M/41.5M [00:59&lt;11:02, 59.2kB/s]
+ 10%|9         | 4.06M/41.5M [00:59&lt;10:50, 60.3kB/s]
+ 10%|9         | 4.08M/41.5M [00:59&lt;09:51, 66.3kB/s]
+ 10%|9         | 4.09M/41.5M [00:59&lt;11:08, 58.6kB/s]
+ 10%|9         | 4.11M/41.5M [00:59&lt;09:05, 71.9kB/s]
+ 10%|9         | 4.12M/41.5M [01:00&lt;08:50, 73.9kB/s]
+ 10%|9         | 4.13M/41.5M [01:00&lt;11:37, 56.1kB/s]
+ 10%|#         | 4.16M/41.5M [01:00&lt;08:54, 73.2kB/s]
+ 10%|#         | 4.16M/41.5M [01:00&lt;09:42, 67.2kB/s]
+ 10%|#         | 4.17M/41.5M [01:01&lt;13:08, 49.7kB/s]
+ 10%|#         | 4.20M/41.5M [01:01&lt;09:07, 71.4kB/s]
+ 10%|#         | 4.21M/41.5M [01:01&lt;10:25, 62.5kB/s]
+ 10%|#         | 4.22M/41.5M [01:01&lt;10:22, 62.8kB/s]
+ 10%|#         | 4.23M/41.5M [01:01&lt;09:40, 67.3kB/s]
+ 10%|#         | 4.24M/41.5M [01:02&lt;10:23, 62.6kB/s]
+ 10%|#         | 4.25M/41.5M [01:02&lt;10:19, 63.0kB/s]
+ 10%|#         | 4.27M/41.5M [01:02&lt;09:33, 68.1kB/s]
+ 10%|#         | 4.27M/41.5M [01:02&lt;09:40, 67.3kB/s]
+ 10%|#         | 4.29M/41.5M [01:02&lt;08:31, 76.2kB/s]
+ 10%|#         | 4.30M/41.5M [01:02&lt;07:52, 82.5kB/s]
+ 10%|#         | 4.32M/41.5M [01:03&lt;07:28, 87.0kB/s]
+ 10%|#         | 4.34M/41.5M [01:03&lt;07:12, 90.1kB/s]
+ 10%|#         | 4.35M/41.5M [01:03&lt;07:02, 92.2kB/s]
+ 11%|#         | 4.37M/41.5M [01:03&lt;06:55, 93.8kB/s]
+ 11%|#         | 4.38M/41.5M [01:03&lt;06:50, 94.8kB/s]
+ 11%|#         | 4.40M/41.5M [01:03&lt;06:46, 95.6kB/s]
+ 11%|#         | 4.41M/41.5M [01:04&lt;06:44, 96.1kB/s]
+ 11%|#         | 4.44M/41.5M [01:04&lt;05:49, 111kB/s]
+ 11%|#         | 4.45M/41.5M [01:04&lt;06:03, 107kB/s]
+ 11%|#         | 4.48M/41.5M [01:04&lt;05:27, 119kB/s]
+ 11%|#         | 4.50M/41.5M [01:04&lt;05:05, 127kB/s]
+ 11%|#         | 4.52M/41.5M [01:04&lt;04:52, 132kB/s]
+ 11%|#         | 4.55M/41.5M [01:05&lt;06:08, 105kB/s]
+ 11%|#1        | 4.58M/41.5M [01:05&lt;05:04, 127kB/s]
+ 11%|#1        | 4.59M/41.5M [01:05&lt;06:48, 94.7kB/s]
+ 11%|#1        | 4.62M/41.5M [01:06&lt;06:44, 95.7kB/s]
+ 11%|#1        | 4.65M/41.5M [01:06&lt;06:04, 106kB/s]
+ 11%|#1        | 4.66M/41.5M [01:06&lt;06:32, 98.3kB/s]
+ 11%|#1        | 4.68M/41.5M [01:06&lt;07:43, 83.3kB/s]
+ 11%|#1        | 4.70M/41.5M [01:06&lt;07:51, 81.9kB/s]
+ 11%|#1        | 4.71M/41.5M [01:07&lt;09:16, 69.3kB/s]
+ 11%|#1        | 4.73M/41.5M [01:07&lt;08:32, 75.3kB/s]
+ 11%|#1        | 4.74M/41.5M [01:07&lt;09:23, 68.4kB/s]
+ 11%|#1        | 4.76M/41.5M [01:08&lt;10:28, 61.3kB/s]
+ 11%|#1        | 4.77M/41.5M [01:08&lt;10:56, 58.7kB/s]
+ 12%|#1        | 4.77M/41.5M [01:08&lt;12:00, 53.5kB/s]
+ 12%|#1        | 4.79M/41.5M [01:08&lt;11:51, 54.1kB/s]
+ 12%|#1        | 4.80M/41.5M [01:08&lt;12:07, 52.9kB/s]
+ 12%|#1        | 4.80M/41.5M [01:09&lt;12:21, 51.9kB/s]
+ 12%|#1        | 4.81M/41.5M [01:09&lt;12:33, 51.0kB/s]
+ 12%|#1        | 4.82M/41.5M [01:09&lt;12:42, 50.4kB/s]
+ 12%|#1        | 4.83M/41.5M [01:09&lt;12:50, 49.9kB/s]
+ 12%|#1        | 4.84M/41.5M [01:09&lt;12:55, 49.6kB/s]
+ 12%|#1        | 4.84M/41.5M [01:10&lt;16:44, 38.2kB/s]
+ 12%|#1        | 4.86M/41.5M [01:10&lt;15:08, 42.3kB/s]
+ 12%|#1        | 4.87M/41.5M [01:10&lt;21:02, 30.4kB/s]
+ 12%|#1        | 4.88M/41.5M [01:11&lt;15:48, 40.5kB/s]
+ 12%|#1        | 4.89M/41.5M [01:11&lt;17:24, 36.8kB/s]
+ 12%|#1        | 4.90M/41.5M [01:11&lt;17:11, 37.2kB/s]
+ 12%|#1        | 4.91M/41.5M [01:11&lt;18:39, 34.3kB/s]
+ 12%|#1        | 4.92M/41.5M [01:12&lt;14:14, 44.9kB/s]
+ 12%|#1        | 4.93M/41.5M [01:12&lt;13:58, 45.7kB/s]
+ 12%|#1        | 4.94M/41.5M [01:12&lt;20:18, 31.4kB/s]
+ 12%|#1        | 4.95M/41.5M [01:13&lt;13:53, 46.0kB/s]
+ 12%|#1        | 4.96M/41.5M [01:13&lt;13:42, 46.5kB/s]
+ 12%|#1        | 4.97M/41.5M [01:13&lt;16:47, 38.0kB/s]
+ 12%|#2        | 4.98M/41.5M [01:13&lt;13:56, 45.7kB/s]
+ 12%|#2        | 4.99M/41.5M [01:14&lt;16:20, 39.0kB/s]
+ 12%|#2        | 5.00M/41.5M [01:14&lt;19:59, 31.9kB/s]
+ 12%|#2        | 5.02M/41.5M [01:14&lt;14:36, 43.6kB/s]
+ 12%|#2        | 5.02M/41.5M [01:14&lt;14:16, 44.7kB/s]
+ 12%|#2        | 5.03M/41.5M [01:14&lt;13:58, 45.6kB/s]
+ 12%|#2        | 5.04M/41.5M [01:15&lt;13:44, 46.3kB/s]
+ 12%|#2        | 5.05M/41.5M [01:15&lt;13:33, 47.0kB/s]
+ 12%|#2        | 5.05M/41.5M [01:15&lt;13:25, 47.4kB/s]
+ 12%|#2        | 5.06M/41.5M [01:15&lt;13:19, 47.8kB/s]
+ 12%|#2        | 5.08M/41.5M [01:15&lt;11:04, 57.5kB/s]
+ 12%|#2        | 5.09M/41.5M [01:16&lt;10:42, 59.5kB/s]
+ 12%|#2        | 5.09M/41.5M [01:16&lt;11:18, 56.3kB/s]
+ 12%|#2        | 5.11M/41.5M [01:16&lt;12:02, 52.8kB/s]
+ 12%|#2        | 5.12M/41.5M [01:16&lt;12:25, 51.1kB/s]
+ 12%|#2        | 5.14M/41.5M [01:17&lt;10:58, 57.9kB/s]
+ 12%|#2        | 5.15M/41.5M [01:17&lt;11:23, 55.8kB/s]
+ 12%|#2        | 5.16M/41.5M [01:17&lt;14:39, 43.3kB/s]
+ 12%|#2        | 5.17M/41.5M [01:17&lt;14:01, 45.2kB/s]
+ 13%|#2        | 5.19M/41.5M [01:18&lt;15:59, 39.7kB/s]
+ 13%|#2        | 5.20M/41.5M [01:18&lt;15:23, 41.2kB/s]
+ 13%|#2        | 5.20M/41.5M [01:19&lt;20:43, 30.6kB/s]
+ 13%|#2        | 5.22M/41.5M [01:19&lt;17:42, 35.8kB/s]
+ 13%|#2        | 5.23M/41.5M [01:19&lt;16:40, 38.0kB/s]
+ 13%|#2        | 5.23M/41.5M [01:19&lt;18:54, 33.5kB/s]
+ 13%|#2        | 5.24M/41.5M [01:20&lt;17:24, 36.4kB/s]
+ 13%|#2        | 5.25M/41.5M [01:20&lt;16:14, 39.0kB/s]
+ 13%|#2        | 5.26M/41.5M [01:20&lt;15:20, 41.3kB/s]
+ 13%|#2        | 5.27M/41.5M [01:20&lt;22:01, 28.7kB/s]
+ 13%|#2        | 5.28M/41.5M [01:21&lt;15:05, 41.9kB/s]
+ 13%|#2        | 5.29M/41.5M [01:21&lt;14:34, 43.4kB/s]
+ 13%|#2        | 5.30M/41.5M [01:21&lt;14:10, 44.6kB/s]
+ 13%|#2        | 5.30M/41.5M [01:21&lt;13:51, 45.6kB/s]
+ 13%|#2        | 5.31M/41.5M [01:21&lt;17:13, 36.7kB/s]
+ 13%|#2        | 5.32M/41.5M [01:22&lt;16:00, 39.5kB/s]
+ 13%|#2        | 5.33M/41.5M [01:22&lt;15:08, 41.7kB/s]
+ 13%|#2        | 5.34M/41.5M [01:22&lt;14:30, 43.5kB/s]
+ 13%|#2        | 5.34M/41.5M [01:22&lt;14:03, 44.9kB/s]
+ 13%|#2        | 5.35M/41.5M [01:22&lt;13:44, 46.0kB/s]
+ 13%|#2        | 5.36M/41.5M [01:22&lt;13:30, 46.7kB/s]
+ 13%|#2        | 5.37M/41.5M [01:23&lt;13:20, 47.3kB/s]
+ 13%|#2        | 5.38M/41.5M [01:23&lt;13:13, 47.7kB/s]
+ 13%|#2        | 5.39M/41.5M [01:23&lt;10:07, 62.3kB/s]
+ 13%|#3        | 5.40M/41.5M [01:23&lt;10:49, 58.3kB/s]
+ 13%|#3        | 5.41M/41.5M [01:23&lt;11:22, 55.4kB/s]
+ 13%|#3        | 5.42M/41.5M [01:24&lt;12:02, 52.3kB/s]
+ 13%|#3        | 5.44M/41.5M [01:24&lt;09:56, 63.4kB/s]
+ 13%|#3        | 5.45M/41.5M [01:24&lt;09:16, 67.9kB/s]
+ 13%|#3        | 5.46M/41.5M [01:24&lt;09:22, 67.2kB/s]
+ 13%|#3        | 5.48M/41.5M [01:24&lt;10:39, 59.0kB/s]
+ 13%|#3        | 5.49M/41.5M [01:25&lt;09:13, 68.2kB/s]
+ 13%|#3        | 5.51M/41.5M [01:25&lt;08:50, 71.1kB/s]
+ 13%|#3        | 5.52M/41.5M [01:25&lt;09:00, 69.8kB/s]
+ 13%|#3        | 5.53M/41.5M [01:25&lt;08:40, 72.5kB/s]
+ 13%|#3        | 5.55M/41.5M [01:25&lt;07:55, 79.2kB/s]
+ 13%|#3        | 5.55M/41.5M [01:26&lt;08:53, 70.7kB/s]
+ 13%|#3        | 5.57M/41.5M [01:26&lt;10:18, 60.9kB/s]
+ 13%|#3        | 5.59M/41.5M [01:26&lt;09:31, 65.9kB/s]
+ 14%|#3        | 5.60M/41.5M [01:26&lt;08:31, 73.6kB/s]
+ 14%|#3        | 5.61M/41.5M [01:26&lt;08:45, 71.6kB/s]
+ 14%|#3        | 5.62M/41.5M [01:27&lt;10:44, 58.4kB/s]
+ 14%|#3        | 5.64M/41.5M [01:27&lt;10:57, 57.2kB/s]
+ 14%|#3        | 5.65M/41.5M [01:27&lt;11:19, 55.3kB/s]
+ 14%|#3        | 5.66M/41.5M [01:27&lt;09:35, 65.3kB/s]
+ 14%|#3        | 5.67M/41.5M [01:28&lt;10:14, 61.1kB/s]
+ 14%|#3        | 5.68M/41.5M [01:28&lt;10:49, 57.8kB/s]
+ 14%|#3        | 5.69M/41.5M [01:28&lt;11:19, 55.3kB/s]
+ 14%|#3        | 5.70M/41.5M [01:28&lt;11:39, 53.7kB/s]
+ 14%|#3        | 5.70M/41.5M [01:28&lt;15:25, 40.5kB/s]
+ 14%|#3        | 5.73M/41.5M [01:29&lt;10:05, 61.9kB/s]
+ 14%|#3        | 5.73M/41.5M [01:29&lt;10:39, 58.7kB/s]
+ 14%|#3        | 5.74M/41.5M [01:29&lt;10:24, 60.0kB/s]
+ 14%|#3        | 5.75M/41.5M [01:29&lt;10:59, 56.8kB/s]
+ 14%|#3        | 5.76M/41.5M [01:29&lt;11:28, 54.4kB/s]
+ 14%|#3        | 5.77M/41.5M [01:29&lt;11:50, 52.8kB/s]
+ 14%|#3        | 5.77M/41.5M [01:30&lt;13:00, 48.0kB/s]
+ 14%|#3        | 5.78M/41.5M [01:30&lt;16:35, 37.6kB/s]
+ 14%|#3        | 5.79M/41.5M [01:30&lt;15:30, 40.2kB/s]
+ 14%|#3        | 5.80M/41.5M [01:30&lt;18:27, 33.8kB/s]
+ 14%|#4        | 5.81M/41.5M [01:31&lt;12:58, 48.1kB/s]
+ 14%|#4        | 5.82M/41.5M [01:31&lt;12:55, 48.2kB/s]
+ 14%|#4        | 5.83M/41.5M [01:31&lt;12:53, 48.3kB/s]
+ 14%|#4        | 5.84M/41.5M [01:31&lt;16:19, 38.2kB/s]
+ 14%|#4        | 5.85M/41.5M [01:31&lt;11:59, 51.9kB/s]
+ 14%|#4        | 5.86M/41.5M [01:32&lt;12:56, 48.1kB/s]
+ 14%|#4        | 5.87M/41.5M [01:32&lt;12:54, 48.2kB/s]
+ 14%|#4        | 5.88M/41.5M [01:32&lt;12:01, 51.8kB/s]
+ 14%|#4        | 5.88M/41.5M [01:32&lt;12:13, 50.9kB/s]
+ 14%|#4        | 5.89M/41.5M [01:32&lt;15:59, 38.9kB/s]
+ 14%|#4        | 5.91M/41.5M [01:33&lt;12:29, 49.8kB/s]
+ 14%|#4        | 5.93M/41.5M [01:33&lt;10:24, 59.7kB/s]
+ 14%|#4        | 5.94M/41.5M [01:33&lt;13:22, 46.5kB/s]
+ 14%|#4        | 5.95M/41.5M [01:34&lt;11:26, 54.3kB/s]
+ 14%|#4        | 5.96M/41.5M [01:34&lt;11:02, 56.2kB/s]
+ 14%|#4        | 5.97M/41.5M [01:34&lt;11:25, 54.3kB/s]
+ 14%|#4        | 5.98M/41.5M [01:34&lt;11:45, 52.8kB/s]
+ 14%|#4        | 5.98M/41.5M [01:34&lt;12:00, 51.7kB/s]
+ 14%|#4        | 6.00M/41.5M [01:34&lt;10:17, 60.2kB/s]
+ 14%|#4        | 6.01M/41.5M [01:35&lt;10:51, 57.1kB/s]
+ 14%|#4        | 6.02M/41.5M [01:35&lt;10:31, 58.9kB/s]
+ 15%|#4        | 6.03M/41.5M [01:35&lt;09:27, 65.5kB/s]
+ 15%|#4        | 6.04M/41.5M [01:35&lt;09:28, 65.4kB/s]
+ 15%|#4        | 6.05M/41.5M [01:35&lt;08:52, 69.8kB/s]
+ 15%|#4        | 6.06M/41.5M [01:35&lt;09:01, 68.7kB/s]
+ 15%|#4        | 6.08M/41.5M [01:36&lt;11:03, 56.0kB/s]
+ 15%|#4        | 6.10M/41.5M [01:36&lt;07:58, 77.5kB/s]
+ 15%|#4        | 6.12M/41.5M [01:36&lt;11:18, 54.7kB/s]
+ 15%|#4        | 6.14M/41.5M [01:37&lt;08:31, 72.5kB/s]
+ 15%|#4        | 6.16M/41.5M [01:37&lt;11:22, 54.3kB/s]
+ 15%|#4        | 6.17M/41.5M [01:37&lt;11:46, 52.4kB/s]
+ 15%|#4        | 6.18M/41.5M [01:38&lt;11:53, 51.9kB/s]
+ 15%|#4        | 6.19M/41.5M [01:38&lt;14:29, 42.6kB/s]
+ 15%|#4        | 6.20M/41.5M [01:38&lt;16:49, 36.7kB/s]
+ 15%|#4        | 6.20M/41.5M [01:38&lt;15:50, 38.9kB/s]
+ 15%|#4        | 6.21M/41.5M [01:39&lt;18:13, 33.8kB/s]
+ 15%|#4        | 6.22M/41.5M [01:39&lt;23:27, 26.3kB/s]
+ 15%|#5        | 6.23M/41.5M [01:40&lt;38:31, 16.0kB/s]
+ 15%|#5        | 6.23M/41.5M [01:41&lt;38:22, 16.1kB/s]
+ 15%|#5        | 6.24M/41.5M [01:42&lt;42:34, 14.5kB/s]
+ 15%|#5        | 6.25M/41.5M [01:42&lt;37:33, 16.4kB/s]
+ 15%|#5        | 6.26M/41.5M [01:43&lt;40:39, 15.1kB/s]
+ 15%|#5        | 6.27M/41.5M [01:43&lt;32:22, 19.0kB/s]
+ 15%|#5        | 6.27M/41.5M [01:43&lt;30:16, 20.3kB/s]
+ 15%|#5        | 6.28M/41.5M [01:43&lt;25:01, 24.6kB/s]
+ 15%|#5        | 6.29M/41.5M [01:44&lt;25:05, 24.5kB/s]
+ 15%|#5        | 6.30M/41.5M [01:44&lt;21:21, 28.8kB/s]
+ 15%|#5        | 6.30M/41.5M [01:44&lt;18:45, 32.8kB/s]
+ 15%|#5        | 6.31M/41.5M [01:44&lt;16:54, 36.3kB/s]
+ 15%|#5        | 6.32M/41.5M [01:44&lt;15:37, 39.3kB/s]
+ 15%|#5        | 6.34M/41.5M [01:44&lt;11:20, 54.1kB/s]
+ 15%|#5        | 6.34M/41.5M [01:45&lt;14:46, 41.6kB/s]
+ 15%|#5        | 6.36M/41.5M [01:45&lt;11:55, 51.5kB/s]
+ 15%|#5        | 6.37M/41.5M [01:45&lt;13:06, 46.8kB/s]
+ 15%|#5        | 6.38M/41.5M [01:45&lt;12:58, 47.3kB/s]
+ 15%|#5        | 6.39M/41.5M [01:46&lt;11:14, 54.6kB/s]
+ 15%|#5        | 6.40M/41.5M [01:46&lt;10:25, 58.8kB/s]
+ 15%|#5        | 6.41M/41.5M [01:46&lt;14:02, 43.6kB/s]
+ 15%|#5        | 6.42M/41.5M [01:46&lt;11:52, 51.6kB/s]
+ 15%|#5        | 6.43M/41.5M [01:46&lt;12:01, 50.9kB/s]
+ 16%|#5        | 6.45M/41.5M [01:47&lt;09:46, 62.7kB/s]
+ 16%|#5        | 6.45M/41.5M [01:47&lt;10:23, 59.0kB/s]
+ 16%|#5        | 6.46M/41.5M [01:47&lt;13:54, 44.0kB/s]
+ 16%|#5        | 6.48M/41.5M [01:47&lt;10:49, 56.5kB/s]
+ 16%|#5        | 6.48M/41.5M [01:47&lt;11:12, 54.6kB/s]
+ 16%|#5        | 6.49M/41.5M [01:48&lt;11:32, 53.0kB/s]
+ 16%|#5        | 6.51M/41.5M [01:48&lt;09:22, 65.2kB/s]
+ 16%|#5        | 6.52M/41.5M [01:48&lt;10:06, 60.5kB/s]
+ 16%|#5        | 6.52M/41.5M [01:48&lt;10:41, 57.1kB/s]
+ 16%|#5        | 6.54M/41.5M [01:48&lt;09:32, 64.0kB/s]
+ 16%|#5        | 6.55M/41.5M [01:49&lt;13:03, 46.7kB/s]
+ 16%|#5        | 6.56M/41.5M [01:49&lt;13:29, 45.2kB/s]
+ 16%|#5        | 6.58M/41.5M [01:49&lt;14:16, 42.7kB/s]
+ 16%|#5        | 6.59M/41.5M [01:50&lt;21:34, 28.3kB/s]
+ 16%|#5        | 6.59M/41.5M [01:50&lt;22:21, 27.3kB/s]
+ 16%|#5        | 6.60M/41.5M [01:51&lt;23:00, 26.5kB/s]
+ 16%|#5        | 6.61M/41.5M [01:51&lt;20:17, 30.0kB/s]
+ 16%|#5        | 6.62M/41.5M [01:51&lt;21:34, 28.2kB/s]
+ 16%|#5        | 6.62M/41.5M [01:51&lt;19:03, 32.0kB/s]
+ 16%|#5        | 6.63M/41.5M [01:52&lt;17:11, 35.4kB/s]
+ 16%|#6        | 6.64M/41.5M [01:52&lt;15:50, 38.5kB/s]
+ 16%|#6        | 6.65M/41.5M [01:52&lt;14:51, 41.0kB/s]
+ 16%|#6        | 6.66M/41.5M [01:52&lt;14:09, 43.0kB/s]
+ 16%|#6        | 6.67M/41.5M [01:52&lt;10:33, 57.7kB/s]
+ 16%|#6        | 6.68M/41.5M [01:52&lt;11:01, 55.2kB/s]
+ 16%|#6        | 6.69M/41.5M [01:53&lt;11:24, 53.3kB/s]
+ 16%|#6        | 6.70M/41.5M [01:53&lt;09:12, 66.0kB/s]
+ 16%|#6        | 6.72M/41.5M [01:53&lt;08:05, 75.1kB/s]
+ 16%|#6        | 6.73M/41.5M [01:53&lt;09:01, 67.4kB/s]
+ 16%|#6        | 6.75M/41.5M [01:53&lt;06:41, 90.6kB/s]
+ 16%|#6        | 6.77M/41.5M [01:53&lt;06:33, 92.6kB/s]
+ 16%|#6        | 6.79M/41.5M [01:54&lt;05:35, 109kB/s]
+ 16%|#6        | 6.81M/41.5M [01:54&lt;05:03, 120kB/s]
+ 16%|#6        | 6.84M/41.5M [01:54&lt;04:44, 128kB/s]
+ 17%|#6        | 6.87M/41.5M [01:54&lt;05:19, 114kB/s]
+ 17%|#6        | 6.92M/41.5M [01:54&lt;03:33, 170kB/s]
+ 17%|#6        | 6.95M/41.5M [01:55&lt;03:25, 176kB/s]
+ 17%|#6        | 6.98M/41.5M [01:55&lt;03:35, 168kB/s]
+ 17%|#6        | 7.00M/41.5M [01:55&lt;03:43, 162kB/s]
+ 17%|#6        | 7.02M/41.5M [01:55&lt;03:49, 157kB/s]
+ 17%|#6        | 7.04M/41.5M [01:55&lt;04:19, 139kB/s]
+ 17%|#7        | 7.07M/41.5M [01:55&lt;03:51, 156kB/s]
+ 17%|#7        | 7.09M/41.5M [01:56&lt;04:20, 139kB/s]
+ 17%|#7        | 7.10M/41.5M [01:56&lt;06:09, 97.5kB/s]
+ 17%|#7        | 7.12M/41.5M [01:56&lt;05:29, 109kB/s]
+ 17%|#7        | 7.14M/41.5M [01:56&lt;05:38, 106kB/s]
+ 17%|#7        | 7.16M/41.5M [01:56&lt;05:46, 104kB/s]
+ 17%|#7        | 7.17M/41.5M [01:57&lt;05:52, 102kB/s]
+ 17%|#7        | 7.19M/41.5M [01:57&lt;07:39, 78.4kB/s]
+ 17%|#7        | 7.20M/41.5M [01:57&lt;07:13, 82.9kB/s]
+ 17%|#7        | 7.22M/41.5M [01:57&lt;06:54, 86.6kB/s]
+ 17%|#7        | 7.23M/41.5M [01:58&lt;08:28, 70.6kB/s]
+ 17%|#7        | 7.26M/41.5M [01:58&lt;07:12, 82.9kB/s]
+ 18%|#7        | 7.27M/41.5M [01:58&lt;08:08, 73.4kB/s]
+ 18%|#7        | 7.29M/41.5M [01:58&lt;08:01, 74.5kB/s]
+ 18%|#7        | 7.30M/41.5M [01:59&lt;08:12, 72.8kB/s]
+ 18%|#7        | 7.31M/41.5M [01:59&lt;07:32, 79.2kB/s]
+ 18%|#7        | 7.32M/41.5M [01:59&lt;08:25, 70.9kB/s]
+ 18%|#7        | 7.34M/41.5M [01:59&lt;07:37, 78.3kB/s]
+ 18%|#7        | 7.35M/41.5M [01:59&lt;07:07, 83.7kB/s]
+ 18%|#7        | 7.37M/41.5M [01:59&lt;07:49, 76.2kB/s]
+ 18%|#7        | 7.38M/41.5M [02:00&lt;06:47, 87.8kB/s]
+ 18%|#7        | 7.40M/41.5M [02:00&lt;10:23, 57.4kB/s]
+ 18%|#7        | 7.42M/41.5M [02:00&lt;07:51, 75.7kB/s]
+ 18%|#7        | 7.44M/41.5M [02:01&lt;09:55, 60.0kB/s]
+ 18%|#7        | 7.45M/41.5M [02:01&lt;08:50, 67.2kB/s]
+ 18%|#8        | 7.47M/41.5M [02:01&lt;09:49, 60.6kB/s]
... 10934 lines suppressed ...