You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/05/17 10:46:19 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@de21c8f2ef507587fdcc99b851404de5aeeb5a16)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new eb0d6feae deploying docs (apache/tvm@de21c8f2ef507587fdcc99b851404de5aeeb5a16)
eb0d6feae is described below

commit eb0d6feaeae89d7cb5b07840914ba5f19f95ef73
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Tue May 17 10:46:15 2022 +0000

    deploying docs (apache/tvm@de21c8f2ef507587fdcc99b851404de5aeeb5a16)
---
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    7 +-
 .../how_to/compile_models/from_paddle.rst.txt      |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    5 -
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   18 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    4 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |   10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   16 +-
 .../tune_conv2d_layer_cuda.rst.txt                 | 1940 ++------------------
 .../tune_network_cuda.rst.txt                      |    2 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |  172 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |   12 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |   34 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |   12 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    8 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   18 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    6 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |    9 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   56 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   26 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   44 +-
 docs/commit_hash                                   |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       | 1874 ++++++++++++++++++-
 docs/how_to/compile_models/from_paddle.html        |    2 +-
 docs/how_to/compile_models/from_pytorch.html       |    6 +-
 docs/how_to/compile_models/from_tensorflow.html    |    1 -
 docs/how_to/compile_models/sg_execution_times.html |   22 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   63 +-
 docs/how_to/deploy_models/deploy_prequantized.html |   11 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   34 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   18 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    4 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |   10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    | 1940 ++------------------
 .../tune_with_autoscheduler/tune_network_cuda.html |    2 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |  172 +-
 .../tune_with_autotvm/sg_execution_times.html      |   12 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |   34 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 .../work_with_microtvm/sg_execution_times.html     |   12 +-
 .../how_to/work_with_relay/sg_execution_times.html |    8 +-
 .../work_with_schedules/sg_execution_times.html    |   18 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +-
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    6 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    5 +-
 docs/tutorial/autotvm_relay_x86.html               |  258 +--
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/sg_execution_times.html              |   26 +-
 docs/tutorial/tensor_expr_get_started.html         |   44 +-
 115 files changed, 2995 insertions(+), 4708 deletions(-)

diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index cdbfd724e..4c51366bc 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -98,7 +98,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip2e9fcbdd-04fb-48d4-9551-2fca667db007 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip241648fa-9037-4f2c-98f8-146ee42e6cc7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index c10c7c0f8..6355c0db5 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -100,7 +100,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<07:45, 93.5kB/s]
      0%|          | 48.0k/41.5M [00:00<04:53, 148kB/s] 
      0%|          | 96.0k/41.5M [00:00<03:28, 208kB/s]
      0%|          | 168k/41.5M [00:00<02:28, 291kB/s] 
      1%|          | 336k/41.5M [00:00<01:20, 538kB/s]
      2%|1         | 648k/41.5M [00:01<00:44, 971kB/s]
      3%|3         | 1.27M/41.5M [00:01<00:22, 1.88MB/s]
      6%|6         | 2.53M/41.5M [00:01<00:11, 3.68MB/s]
     10%|9         | 4.03M/41.5M [00:01<00:07, 5.32MB/s]
     13%|#3        | 5.52M/41.5M [00:01<00:05, 7.14MB/s]
     16%|#5        | 6.59M/41.5M [00:01<00:04, 8.01MB/s]
     18%|#7        | 7.44M/41.5M [00:01<00:04, 7.54MB/s]
     20%|#9        | 8.21M/41.5M [00:02<00:05, 6.01MB/s]
     23%|##2       | 9.52M/41.5M [00:02<00:05, 6.58MB/s]
     25%|##4       | 10.3M/41.5M [00:02<00:05, 6.42MB/s]
     28%|##8       | 11.8M/41.5M [00:02<00:04, 7.23MB/s]
     32%|###1      | 13.3M/41.5M [00:02<00
 :03, 7.76MB/s]
     35%|###4      | 14.4M/41.5M [00:03<00:04, 6.95MB/s]
     38%|###8      | 15.9M/41.5M [00:03<00:03, 7.52MB/s]
     40%|####      | 16.7M/41.5M [00:03<00:03, 6.65MB/s]
     44%|####3     | 18.1M/41.5M [00:03<00:03, 7.24MB/s]
     47%|####7     | 19.6M/41.5M [00:03<00:02, 7.71MB/s]
     51%|#####     | 21.1M/41.5M [00:03<00:02, 8.07MB/s]
     55%|#####4    | 22.6M/41.5M [00:04<00:02, 8.32MB/s]
     58%|#####8    | 24.1M/41.5M [00:04<00:01, 9.53MB/s]
     62%|######1   | 25.6M/41.5M [00:04<00:01, 10.2MB/s]
     64%|######4   | 26.6M/41.5M [00:04<00:02, 7.20MB/s]
     67%|######6   | 27.6M/41.5M [00:04<00:02, 6.90MB/s]
     68%|######8   | 28.4M/41.5M [00:04<00:02, 6.23MB/s]
     71%|#######1  | 29.5M/41.5M [00:05<00:02, 6.27MB/s]
     74%|#######3  | 30.6M/41.5M [00:05<00:01, 6.32MB/s]
     76%|#######6  | 31.7M/41.5M [00:05<00:01, 6.39MB/s]
     79%|#######9  | 32.8M/41.5M [00:05<00:01, 6.95MB/s]
     82%|########1 | 33.9M/41.5M [00:05<00:01, 7.26MB/s]
     84%|####
 ####4 | 35.0M/41.5M [00:05<00:00, 7.11MB/s]
     87%|########7 | 36.2M/41.5M [00:06<00:00, 7.65MB/s]
     89%|########9 | 36.9M/41.5M [00:06<00:00, 7.29MB/s]
     91%|######### | 37.7M/41.5M [00:06<00:00, 6.29MB/s]
     93%|#########2| 38.5M/41.5M [00:06<00:00, 5.96MB/s]
     94%|#########4| 39.1M/41.5M [00:06<00:00, 5.12MB/s]
     97%|#########6| 40.1M/41.5M [00:06<00:00, 5.32MB/s]
     99%|#########8| 40.9M/41.5M [00:07<00:00, 5.20MB/s]
    100%|##########| 41.5M/41.5M [00:07<00:00, 6.12MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<15:35, 46.5kB/s]
      0%|          | 56.0k/41.5M [00:00<05:50, 124kB/s] 
      0%|          | 72.0k/41.5M [00:00<06:26, 112kB/s]
      0%|          | 88.0k/41.5M [00:00<06:51, 105kB/s]
      0%|          | 104k/41.5M [00:01<07:08, 101kB/s] 
      0%|          | 120k/41.5M [00:01<07:20, 98.5kB/s]
      0%|          | 136k/41.5M [00:01<07:28, 96.7kB/s]
      0%|          | 152k/41.5M [00:01<07:33, 95.5kB/s]
      0%|          | 168k/41.5M [00:01<07:37, 94.6kB/s]
      0%|          | 184k/41.5M [00:01<07:40, 94.1kB/s]
      0%|          | 200k/41.5M [00:02<10:02, 71.9kB/s]
      1%|          | 216k/41.5M [00:02<11:41, 61.7kB/s]
      1%|          | 240k/41.5M [00:02<09:08, 78.9kB/s]
      1%|          | 256k/41.5M [00:03<15:05, 47.7kB/s]
      1%|          | 264k/41.5M [00:03<15:09, 47.5kB/s]
      1%|          | 272k/41.5M [00:03<15:14, 47.3kB/s]
      1%|          | 280k/41.5M [00:04<15:17, 47.1
 kB/s]
      1%|          | 288k/41.5M [00:04<15:20, 46.9kB/s]
      1%|          | 296k/41.5M [00:04<15:23, 46.8kB/s]
      1%|          | 304k/41.5M [00:04<15:25, 46.7kB/s]
      1%|          | 312k/41.5M [00:04<15:26, 46.6kB/s]
      1%|          | 320k/41.5M [00:04<16:28, 43.7kB/s]
      1%|          | 328k/41.5M [00:05<19:36, 36.7kB/s]
      1%|          | 336k/41.5M [00:05<18:24, 39.1kB/s]
      1%|          | 352k/41.5M [00:05<14:23, 50.0kB/s]
      1%|          | 360k/41.5M [00:06<21:21, 33.7kB/s]
      1%|          | 376k/41.5M [00:06<20:26, 35.2kB/s]
      1%|          | 384k/41.5M [00:06<19:18, 37.2kB/s]
      1%|          | 392k/41.5M [00:07<21:16, 33.8kB/s]
      1%|          | 400k/41.5M [00:07<23:31, 30.5kB/s]
      1%|          | 408k/41.5M [00:07<21:37, 33.2kB/s]
      1%|          | 416k/41.5M [00:07<23:56, 30.0kB/s]
      1%|          | 424k/41.5M [00:08<21:31, 33.3kB/s]
      1%|1         | 432k/41.5M [00:08<19:46, 36.3kB/s]
      1%|1         | 440k/41.5M [00:08<
 23:02, 31.1kB/s]
      1%|1         | 456k/41.5M [00:08<18:05, 39.6kB/s]
      1%|1         | 464k/41.5M [00:09<19:03, 37.6kB/s]
      1%|1         | 472k/41.5M [00:09<18:07, 39.6kB/s]
      1%|1         | 480k/41.5M [00:09<17:23, 41.2kB/s]
      1%|1         | 488k/41.5M [00:09<16:51, 42.5kB/s]
      1%|1         | 496k/41.5M [00:09<16:27, 43.6kB/s]
      1%|1         | 504k/41.5M [00:10<14:15, 50.3kB/s]
      1%|1         | 512k/41.5M [00:10<14:36, 49.1kB/s]
      1%|1         | 528k/41.5M [00:10<12:57, 55.2kB/s]
      1%|1         | 536k/41.5M [00:10<13:34, 52.8kB/s]
      1%|1         | 544k/41.5M [00:10<13:18, 53.8kB/s]
      1%|1         | 560k/41.5M [00:10<11:31, 62.0kB/s]
      1%|1         | 568k/41.5M [00:11<11:46, 60.8kB/s]
      1%|1         | 584k/41.5M [00:11<10:42, 66.7kB/s]
      1%|1         | 592k/41.5M [00:11<11:07, 64.3kB/s]
      1%|1         | 608k/41.5M [00:11<09:47, 73.0kB/s]
      1%|1         | 624k/41.5M [00:11<09:02, 79.0kB/s]
      2%|1         | 640k/41
 .5M [00:12<08:35, 83.2kB/s]
      2%|1         | 656k/41.5M [00:12<08:17, 86.1kB/s]
      2%|1         | 672k/41.5M [00:12<08:06, 88.1kB/s]
      2%|1         | 688k/41.5M [00:12<07:58, 89.5kB/s]
      2%|1         | 704k/41.5M [00:12<10:15, 69.6kB/s]
      2%|1         | 736k/41.5M [00:13<07:35, 93.9kB/s]
      2%|1         | 752k/41.5M [00:13<07:36, 93.5kB/s]
      2%|1         | 768k/41.5M [00:13<07:37, 93.3kB/s]
      2%|1         | 784k/41.5M [00:13<07:38, 93.2kB/s]
      2%|1         | 800k/41.5M [00:13<07:38, 93.1kB/s]
      2%|1         | 816k/41.5M [00:13<07:38, 93.0kB/s]
      2%|1         | 832k/41.5M [00:14<07:13, 98.4kB/s]
      2%|1         | 848k/41.5M [00:14<09:35, 74.1kB/s]
      2%|2         | 880k/41.5M [00:14<07:17, 97.3kB/s]
      2%|2         | 896k/41.5M [00:14<07:23, 95.9kB/s]
      2%|2         | 912k/41.5M [00:15<07:27, 95.1kB/s]
      2%|2         | 928k/41.5M [00:15<07:29, 94.7kB/s]
      2%|2         | 944k/41.5M [00:15<07:31, 94.1kB/s]
      2%|2       
   | 960k/41.5M [00:15<11:56, 59.4kB/s]
      2%|2         | 0.98M/41.5M [00:16<07:27, 95.0kB/s]
      2%|2         | 0.99M/41.5M [00:16<07:29, 94.5kB/s]
      2%|2         | 1.01M/41.5M [00:16<09:22, 75.5kB/s]
      2%|2         | 1.03M/41.5M [00:16<07:53, 89.5kB/s]
      3%|2         | 1.05M/41.5M [00:16<07:49, 90.3kB/s]
      3%|2         | 1.06M/41.5M [00:17<07:46, 90.9kB/s]
      3%|2         | 1.08M/41.5M [00:17<07:43, 91.4kB/s]
      3%|2         | 1.09M/41.5M [00:17<09:48, 71.9kB/s]
      3%|2         | 1.11M/41.5M [00:17<09:41, 72.8kB/s]
      3%|2         | 1.12M/41.5M [00:18<10:01, 70.4kB/s]
      3%|2         | 1.13M/41.5M [00:18<09:13, 76.4kB/s]
      3%|2         | 1.14M/41.5M [00:18<10:21, 68.1kB/s]
      3%|2         | 1.16M/41.5M [00:18<09:23, 75.1kB/s]
      3%|2         | 1.17M/41.5M [00:18<08:47, 80.2kB/s]
      3%|2         | 1.18M/41.5M [00:18<10:01, 70.3kB/s]
      3%|2         | 1.20M/41.5M [00:19<11:50, 59.4kB/s]
      3%|2         | 1.20M/41.5M [00:19<12:31,
  56.2kB/s]
      3%|2         | 1.23M/41.5M [00:19<09:03, 77.7kB/s]
      3%|2         | 1.24M/41.5M [00:19<09:10, 76.7kB/s]
      3%|3         | 1.25M/41.5M [00:19<09:34, 73.4kB/s]
      3%|3         | 1.27M/41.5M [00:20<09:30, 73.9kB/s]
      3%|3         | 1.27M/41.5M [00:20<09:54, 71.0kB/s]
      3%|3         | 1.29M/41.5M [00:20<09:04, 77.5kB/s]
      3%|3         | 1.30M/41.5M [00:20<08:33, 82.1kB/s]
      3%|3         | 1.31M/41.5M [00:20<10:33, 66.5kB/s]
      3%|3         | 1.33M/41.5M [00:21<09:29, 74.0kB/s]
      3%|3         | 1.34M/41.5M [00:21<09:27, 74.2kB/s]
      3%|3         | 1.35M/41.5M [00:21<09:50, 71.3kB/s]
      3%|3         | 1.36M/41.5M [00:21<10:59, 63.8kB/s]
      3%|3         | 1.38M/41.5M [00:21<09:40, 72.5kB/s]
      3%|3         | 1.39M/41.5M [00:21<08:55, 78.6kB/s]
      3%|3         | 1.41M/41.5M [00:22<08:27, 82.8kB/s]
      3%|3         | 1.42M/41.5M [00:22<08:09, 85.8kB/s]
      3%|3         | 1.44M/41.5M [00:22<07:57, 87.9kB/s]
      4%|3       
   | 1.45M/41.5M [00:22<07:49, 89.3kB/s]
      4%|3         | 1.47M/41.5M [00:23<10:03, 69.5kB/s]
      4%|3         | 1.48M/41.5M [00:23<10:58, 63.7kB/s]
      4%|3         | 1.48M/41.5M [00:23<14:54, 46.9kB/s]
      4%|3         | 1.51M/41.5M [00:23<12:37, 55.3kB/s]
      4%|3         | 1.52M/41.5M [00:24<13:04, 53.5kB/s]
      4%|3         | 1.52M/41.5M [00:24<13:28, 51.8kB/s]
      4%|3         | 1.53M/41.5M [00:24<13:50, 50.5kB/s]
      4%|3         | 1.54M/41.5M [00:24<14:07, 49.4kB/s]
      4%|3         | 1.55M/41.5M [00:24<12:09, 57.4kB/s]
      4%|3         | 1.56M/41.5M [00:25<18:49, 37.1kB/s]
      4%|3         | 1.59M/41.5M [00:25<13:12, 52.8kB/s]
      4%|3         | 1.59M/41.5M [00:25<12:51, 54.2kB/s]
      4%|3         | 1.60M/41.5M [00:25<13:19, 52.3kB/s]
      4%|3         | 1.61M/41.5M [00:26<17:13, 40.5kB/s]
      4%|3         | 1.62M/41.5M [00:26<17:04, 40.8kB/s]
      4%|3         | 1.64M/41.5M [00:27<16:19, 42.7kB/s]
      4%|3         | 1.65M/41.5M [00:27<16:02
 , 43.4kB/s]
      4%|3         | 1.66M/41.5M [00:27<15:48, 44.0kB/s]
      4%|4         | 1.66M/41.5M [00:27<15:36, 44.6kB/s]
      4%|4         | 1.67M/41.5M [00:27<15:27, 45.0kB/s]
      4%|4         | 1.68M/41.5M [00:27<15:19, 45.4kB/s]
      4%|4         | 1.69M/41.5M [00:28<19:25, 35.8kB/s]
      4%|4         | 1.70M/41.5M [00:28<18:43, 37.1kB/s]
      4%|4         | 1.72M/41.5M [00:28<14:21, 48.4kB/s]
      4%|4         | 1.73M/41.5M [00:29<17:44, 39.2kB/s]
      4%|4         | 1.73M/41.5M [00:29<17:05, 40.7kB/s]
      4%|4         | 1.74M/41.5M [00:29<16:33, 42.0kB/s]
      4%|4         | 1.75M/41.5M [00:29<16:07, 43.0kB/s]
      4%|4         | 1.76M/41.5M [00:29<15:48, 43.9kB/s]
      4%|4         | 1.77M/41.5M [00:30<16:35, 41.9kB/s]
      4%|4         | 1.77M/41.5M [00:30<15:05, 46.0kB/s]
      4%|4         | 1.78M/41.5M [00:30<15:02, 46.1kB/s]
      4%|4         | 1.79M/41.5M [00:30<15:01, 46.2kB/s]
      4%|4         | 1.80M/41.5M [00:30<14:59, 46.3kB/s]
      4%|4      
    | 1.81M/41.5M [00:30<11:33, 60.0kB/s]
      4%|4         | 1.82M/41.5M [00:31<12:23, 56.0kB/s]
      4%|4         | 1.83M/41.5M [00:31<13:03, 53.1kB/s]
      4%|4         | 1.84M/41.5M [00:31<10:40, 64.9kB/s]
      4%|4         | 1.85M/41.5M [00:31<11:39, 59.4kB/s]
      5%|4         | 1.87M/41.5M [00:31<09:58, 69.4kB/s]
      5%|4         | 1.88M/41.5M [00:32<09:03, 76.3kB/s]
      5%|4         | 1.89M/41.5M [00:32<10:16, 67.4kB/s]
      5%|4         | 1.91M/41.5M [00:32<09:13, 75.0kB/s]
      5%|4         | 1.92M/41.5M [00:32<08:36, 80.3kB/s]
      5%|4         | 1.93M/41.5M [00:32<09:51, 70.2kB/s]
      5%|4         | 1.95M/41.5M [00:32<08:59, 76.8kB/s]
      5%|4         | 1.96M/41.5M [00:33<08:27, 81.6kB/s]
      5%|4         | 1.98M/41.5M [00:33<10:33, 65.4kB/s]
      5%|4         | 2.00M/41.5M [00:33<08:15, 83.6kB/s]
      5%|4         | 2.02M/41.5M [00:33<08:01, 86.0kB/s]
      5%|4         | 2.03M/41.5M [00:33<07:51, 87.8kB/s]
      5%|4         | 2.05M/41.5M [00:34<07:4
 3, 89.2kB/s]
      5%|4         | 2.06M/41.5M [00:34<07:38, 90.2kB/s]
      5%|5         | 2.09M/41.5M [00:34<06:36, 104kB/s] 
      5%|5         | 2.10M/41.5M [00:34<06:49, 101kB/s]
      5%|5         | 2.12M/41.5M [00:34<06:58, 98.5kB/s]
      5%|5         | 2.13M/41.5M [00:35<09:12, 74.7kB/s]
      5%|5         | 2.16M/41.5M [00:35<07:35, 90.6kB/s]
      5%|5         | 2.17M/41.5M [00:35<07:32, 91.1kB/s]
      5%|5         | 2.19M/41.5M [00:35<07:30, 91.6kB/s]
      5%|5         | 2.20M/41.5M [00:35<07:28, 91.9kB/s]
      5%|5         | 2.22M/41.5M [00:36<07:26, 92.2kB/s]
      5%|5         | 2.23M/41.5M [00:36<07:25, 92.3kB/s]
      5%|5         | 2.26M/41.5M [00:36<06:28, 106kB/s] 
      5%|5         | 2.27M/41.5M [00:36<08:41, 78.9kB/s]
      6%|5         | 2.30M/41.5M [00:36<07:18, 93.6kB/s]
      6%|5         | 2.31M/41.5M [00:37<07:19, 93.4kB/s]
      6%|5         | 2.33M/41.5M [00:37<07:20, 93.2kB/s]
      6%|5         | 2.34M/41.5M [00:37<07:20, 93.1kB/s]
      6%|5      
    | 2.36M/41.5M [00:37<07:21, 93.0kB/s]
      6%|5         | 2.38M/41.5M [00:37<07:21, 92.9kB/s]
      6%|5         | 2.39M/41.5M [00:38<07:54, 86.3kB/s]
      6%|5         | 2.41M/41.5M [00:38<07:12, 94.9kB/s]
      6%|5         | 2.42M/41.5M [00:38<07:14, 94.2kB/s]
      6%|5         | 2.44M/41.5M [00:38<08:22, 81.6kB/s]
      6%|5         | 2.45M/41.5M [00:38<09:09, 74.5kB/s]
      6%|5         | 2.46M/41.5M [00:39<12:42, 53.6kB/s]
      6%|5         | 2.47M/41.5M [00:39<12:20, 55.3kB/s]
      6%|5         | 2.48M/41.5M [00:39<11:13, 60.8kB/s]
      6%|6         | 2.49M/41.5M [00:39<11:56, 57.1kB/s]
      6%|6         | 2.50M/41.5M [00:39<11:40, 58.3kB/s]
      6%|6         | 2.52M/41.5M [00:40<10:43, 63.5kB/s]
      6%|6         | 2.52M/41.5M [00:40<11:35, 58.7kB/s]
      6%|6         | 2.53M/41.5M [00:40<11:24, 59.6kB/s]
      6%|6         | 2.55M/41.5M [00:40<10:32, 64.6kB/s]
      6%|6         | 2.55M/41.5M [00:40<10:36, 64.2kB/s]
      6%|6         | 2.57M/41.5M [00:40<09:1
 9, 73.0kB/s]
      6%|6         | 2.59M/41.5M [00:41<08:36, 79.0kB/s]
      6%|6         | 2.59M/41.5M [00:41<09:50, 69.1kB/s]
      6%|6         | 2.61M/41.5M [00:41<11:36, 58.5kB/s]
      6%|6         | 2.63M/41.5M [00:41<08:38, 78.6kB/s]
      6%|6         | 2.65M/41.5M [00:42<08:49, 77.0kB/s]
      6%|6         | 2.66M/41.5M [00:42<09:10, 74.0kB/s]
      6%|6         | 2.66M/41.5M [00:42<10:15, 66.2kB/s]
      6%|6         | 2.67M/41.5M [00:42<11:12, 60.6kB/s]
      6%|6         | 2.69M/41.5M [00:42<12:32, 54.1kB/s]
      6%|6         | 2.70M/41.5M [00:43<13:53, 48.8kB/s]
      7%|6         | 2.71M/41.5M [00:43<12:54, 52.5kB/s]
      7%|6         | 2.72M/41.5M [00:43<13:16, 51.1kB/s]
      7%|6         | 2.73M/41.5M [00:43<14:15, 47.5kB/s]
      7%|6         | 2.75M/41.5M [00:44<11:48, 57.4kB/s]
      7%|6         | 2.76M/41.5M [00:44<12:21, 54.8kB/s]
      7%|6         | 2.77M/41.5M [00:44<12:51, 52.6kB/s]
      7%|6         | 2.78M/41.5M [00:44<10:41, 63.3kB/s]
      7%|6     
     | 2.79M/41.5M [00:44<11:31, 58.6kB/s]
      7%|6         | 2.80M/41.5M [00:45<09:54, 68.3kB/s]
      7%|6         | 2.82M/41.5M [00:45<08:58, 75.3kB/s]
      7%|6         | 2.83M/41.5M [00:45<10:06, 66.9kB/s]
      7%|6         | 2.84M/41.5M [00:45<09:04, 74.5kB/s]
      7%|6         | 2.86M/41.5M [00:45<10:56, 61.7kB/s]
      7%|6         | 2.88M/41.5M [00:46<08:21, 80.7kB/s]
      7%|6         | 2.90M/41.5M [00:46<08:02, 83.8kB/s]
      7%|7         | 2.91M/41.5M [00:46<07:49, 86.2kB/s]
      7%|7         | 2.93M/41.5M [00:46<07:39, 87.9kB/s]
      7%|7         | 2.95M/41.5M [00:46<07:32, 89.3kB/s]
      7%|7         | 2.96M/41.5M [00:47<09:36, 70.1kB/s]
      7%|7         | 2.98M/41.5M [00:47<07:45, 86.8kB/s]
      7%|7         | 3.00M/41.5M [00:47<08:04, 83.3kB/s]
      7%|7         | 3.02M/41.5M [00:47<09:23, 71.6kB/s]
      7%|7         | 3.03M/41.5M [00:48<09:15, 72.6kB/s]
      7%|7         | 3.04M/41.5M [00:48<10:08, 66.2kB/s]
      7%|7         | 3.05M/41.5M [00:48<11:
 01, 60.9kB/s]
      7%|7         | 3.07M/41.5M [00:48<09:48, 68.5kB/s]
      7%|7         | 3.08M/41.5M [00:48<11:17, 59.4kB/s]
      7%|7         | 3.09M/41.5M [00:49<11:57, 56.1kB/s]
      7%|7         | 3.09M/41.5M [00:49<12:32, 53.5kB/s]
      7%|7         | 3.10M/41.5M [00:49<13:00, 51.6kB/s]
      7%|7         | 3.11M/41.5M [00:49<13:23, 50.1kB/s]
      8%|7         | 3.12M/41.5M [00:49<13:40, 49.0kB/s]
      8%|7         | 3.13M/41.5M [00:50<14:00, 47.8kB/s]
      8%|7         | 3.15M/41.5M [00:50<11:25, 58.7kB/s]
      8%|7         | 3.16M/41.5M [00:50<12:04, 55.5kB/s]
      8%|7         | 3.16M/41.5M [00:50<12:35, 53.2kB/s]
      8%|7         | 3.18M/41.5M [00:51<13:21, 50.1kB/s]
      8%|7         | 3.20M/41.5M [00:51<11:06, 60.2kB/s]
      8%|7         | 3.20M/41.5M [00:51<11:47, 56.7kB/s]
      8%|7         | 3.21M/41.5M [00:51<12:23, 54.0kB/s]
      8%|7         | 3.22M/41.5M [00:51<12:53, 51.9kB/s]
      8%|7         | 3.23M/41.5M [00:52<17:01, 39.3kB/s]
      8%|7    
      | 3.25M/41.5M [00:52<10:35, 63.1kB/s]
      8%|7         | 3.26M/41.5M [00:52<11:21, 58.9kB/s]
      8%|7         | 3.27M/41.5M [00:52<12:27, 53.6kB/s]
      8%|7         | 3.29M/41.5M [00:53<10:39, 62.7kB/s]
      8%|7         | 3.30M/41.5M [00:53<11:23, 58.6kB/s]
      8%|7         | 3.30M/41.5M [00:53<12:02, 55.4kB/s]
      8%|8         | 3.32M/41.5M [00:53<11:34, 57.6kB/s]
      8%|8         | 3.34M/41.5M [00:53<10:00, 66.6kB/s]
      8%|8         | 3.34M/41.5M [00:54<10:53, 61.2kB/s]
      8%|8         | 3.35M/41.5M [00:54<11:40, 57.1kB/s]
      8%|8         | 3.37M/41.5M [00:54<09:54, 67.2kB/s]
      8%|8         | 3.38M/41.5M [00:54<08:55, 74.6kB/s]
      8%|8         | 3.40M/41.5M [00:54<08:20, 79.8kB/s]
      8%|8         | 3.41M/41.5M [00:54<09:30, 70.0kB/s]
      8%|8         | 3.42M/41.5M [00:55<08:40, 76.8kB/s]
      8%|8         | 3.44M/41.5M [00:55<08:09, 81.5kB/s]
      8%|8         | 3.45M/41.5M [00:55<07:49, 84.9kB/s]
      8%|8         | 3.47M/41.5M [00:55<07
 :37, 87.2kB/s]
      8%|8         | 3.48M/41.5M [00:55<08:34, 77.4kB/s]
      8%|8         | 3.49M/41.5M [00:56<09:00, 73.8kB/s]
      8%|8         | 3.52M/41.5M [00:56<07:38, 86.9kB/s]
      9%|8         | 3.53M/41.5M [00:56<07:29, 88.5kB/s]
      9%|8         | 3.55M/41.5M [00:56<09:30, 69.7kB/s]
      9%|8         | 3.57M/41.5M [00:56<07:13, 91.8kB/s]
      9%|8         | 3.59M/41.5M [00:57<07:39, 86.5kB/s]
      9%|8         | 3.60M/41.5M [00:57<07:30, 88.2kB/s]
      9%|8         | 3.62M/41.5M [00:57<08:28, 78.1kB/s]
      9%|8         | 3.63M/41.5M [00:57<08:04, 81.9kB/s]
      9%|8         | 3.65M/41.5M [00:57<07:48, 84.7kB/s]
      9%|8         | 3.66M/41.5M [00:58<07:36, 87.0kB/s]
      9%|8         | 3.68M/41.5M [00:58<09:34, 69.1kB/s]
      9%|8         | 3.70M/41.5M [00:58<08:50, 74.7kB/s]
      9%|8         | 3.70M/41.5M [00:58<09:46, 67.5kB/s]
      9%|8         | 3.72M/41.5M [00:59<09:27, 69.8kB/s]
      9%|8         | 3.73M/41.5M [00:59<09:43, 67.9kB/s]
      9%|9   
       | 3.73M/41.5M [00:59<13:45, 48.0kB/s]
      9%|9         | 3.76M/41.5M [00:59<10:32, 62.6kB/s]
      9%|9         | 3.77M/41.5M [00:59<09:27, 69.7kB/s]
      9%|9         | 3.79M/41.5M [01:00<09:18, 70.7kB/s]
      9%|9         | 3.80M/41.5M [01:00<12:05, 54.5kB/s]
      9%|9         | 3.82M/41.5M [01:00<09:24, 69.9kB/s]
      9%|9         | 3.83M/41.5M [01:00<10:14, 64.3kB/s]
      9%|9         | 3.84M/41.5M [01:01<09:11, 71.5kB/s]
      9%|9         | 3.85M/41.5M [01:01<10:08, 64.8kB/s]
      9%|9         | 3.87M/41.5M [01:01<11:33, 56.8kB/s]
      9%|9         | 3.89M/41.5M [01:01<08:37, 76.2kB/s]
      9%|9         | 3.91M/41.5M [01:01<08:39, 75.8kB/s]
      9%|9         | 3.91M/41.5M [01:02<09:01, 72.8kB/s]
      9%|9         | 3.92M/41.5M [01:02<10:01, 65.4kB/s]
      9%|9         | 3.94M/41.5M [01:02<08:58, 73.2kB/s]
     10%|9         | 3.95M/41.5M [01:02<08:19, 78.8kB/s]
     10%|9         | 3.97M/41.5M [01:02<08:27, 77.5kB/s]
     10%|9         | 3.98M/41.5M [01:03<0
 8:00, 81.8kB/s]
     10%|9         | 4.00M/41.5M [01:03<07:42, 84.9kB/s]
     10%|9         | 4.02M/41.5M [01:03<07:30, 87.2kB/s]
     10%|9         | 4.03M/41.5M [01:03<07:22, 88.8kB/s]
     10%|9         | 4.05M/41.5M [01:03<06:45, 96.8kB/s]
     10%|9         | 4.06M/41.5M [01:03<06:50, 95.5kB/s]
     10%|9         | 4.08M/41.5M [01:04<06:54, 94.7kB/s]
     10%|9         | 4.10M/41.5M [01:04<06:02, 108kB/s] 
     10%|9         | 4.12M/41.5M [01:04<06:18, 104kB/s]
     10%|9         | 4.13M/41.5M [01:04<06:30, 100kB/s]
     10%|#         | 4.16M/41.5M [01:04<05:49, 112kB/s]
     10%|#         | 4.18M/41.5M [01:04<05:25, 120kB/s]
     10%|#         | 4.20M/41.5M [01:05<05:10, 126kB/s]
     10%|#         | 4.23M/41.5M [01:05<05:00, 130kB/s]
     10%|#         | 4.24M/41.5M [01:05<05:52, 111kB/s]
     10%|#         | 4.27M/41.5M [01:05<05:05, 128kB/s]
     10%|#         | 4.29M/41.5M [01:05<05:19, 122kB/s]
     10%|#         | 4.30M/41.5M [01:06<05:42, 114kB/s]
     10%|#         | 4
 .33M/41.5M [01:06<05:21, 121kB/s]
     10%|#         | 4.34M/41.5M [01:06<05:21, 121kB/s]
     11%|#         | 4.37M/41.5M [01:06<05:06, 127kB/s]
     11%|#         | 4.38M/41.5M [01:06<05:59, 108kB/s]
     11%|#         | 4.41M/41.5M [01:06<05:09, 126kB/s]
     11%|#         | 4.43M/41.5M [01:07<05:20, 121kB/s]
     11%|#         | 4.45M/41.5M [01:07<05:43, 113kB/s]
     11%|#         | 4.47M/41.5M [01:07<05:21, 121kB/s]
     11%|#         | 4.48M/41.5M [01:07<06:17, 103kB/s]
     11%|#         | 4.50M/41.5M [01:07<05:54, 109kB/s]
     11%|#         | 4.52M/41.5M [01:07<05:27, 118kB/s]
     11%|#         | 4.54M/41.5M [01:08<05:50, 111kB/s]
     11%|#         | 4.55M/41.5M [01:08<06:07, 105kB/s]
     11%|#1        | 4.57M/41.5M [01:08<06:56, 92.9kB/s]
     11%|#1        | 4.59M/41.5M [01:08<06:49, 94.6kB/s]
     11%|#1        | 4.60M/41.5M [01:08<06:51, 94.1kB/s]
     11%|#1        | 4.62M/41.5M [01:09<07:00, 91.9kB/s]
     11%|#1        | 4.63M/41.5M [01:09<06:59, 92.2kB/s]
     1
 1%|#1        | 4.65M/41.5M [01:09<06:50, 94.2kB/s]
     11%|#1        | 4.66M/41.5M [01:09<06:51, 93.7kB/s]
     11%|#1        | 4.68M/41.5M [01:09<06:53, 93.4kB/s]
     11%|#1        | 4.70M/41.5M [01:09<06:01, 107kB/s] 
     11%|#1        | 4.71M/41.5M [01:10<05:46, 111kB/s]
     11%|#1        | 4.73M/41.5M [01:10<05:34, 115kB/s]
     11%|#1        | 4.74M/41.5M [01:10<05:58, 107kB/s]
     11%|#1        | 4.76M/41.5M [01:10<06:07, 105kB/s]
     12%|#1        | 4.77M/41.5M [01:10<06:21, 101kB/s]
     12%|#1        | 4.79M/41.5M [01:10<05:39, 113kB/s]
     12%|#1        | 4.80M/41.5M [01:10<05:33, 115kB/s]
     12%|#1        | 4.82M/41.5M [01:11<05:13, 123kB/s]
     12%|#1        | 4.84M/41.5M [01:11<05:43, 112kB/s]
     12%|#1        | 4.85M/41.5M [01:11<05:12, 123kB/s]
     12%|#1        | 4.87M/41.5M [01:11<06:35, 97.2kB/s]
     12%|#1        | 4.89M/41.5M [01:11<05:48, 110kB/s] 
     12%|#1        | 4.91M/41.5M [01:11<06:05, 105kB/s]
     12%|#1        | 4.92M/41.5M [01:12<06:18
 , 101kB/s]
     12%|#1        | 4.95M/41.5M [01:12<05:40, 113kB/s]
     12%|#1        | 4.97M/41.5M [01:12<05:17, 121kB/s]
     12%|#2        | 4.98M/41.5M [01:12<05:40, 112kB/s]
     12%|#2        | 5.00M/41.5M [01:12<07:48, 81.7kB/s]
     12%|#2        | 5.02M/41.5M [01:13<06:38, 96.0kB/s]
     12%|#2        | 5.05M/41.5M [01:13<05:19, 119kB/s] 
     12%|#2        | 5.07M/41.5M [01:13<05:40, 112kB/s]
     12%|#2        | 5.09M/41.5M [01:13<05:57, 107kB/s]
     12%|#2        | 5.11M/41.5M [01:13<05:28, 116kB/s]
     12%|#2        | 5.12M/41.5M [01:14<06:26, 98.7kB/s]
     12%|#2        | 5.15M/41.5M [01:14<05:47, 110kB/s] 
     12%|#2        | 5.16M/41.5M [01:14<06:02, 105kB/s]
     12%|#2        | 5.18M/41.5M [01:14<05:36, 113kB/s]
     13%|#2        | 5.20M/41.5M [01:14<05:56, 107kB/s]
     13%|#2        | 5.21M/41.5M [01:14<06:11, 102kB/s]
     13%|#2        | 5.23M/41.5M [01:15<06:22, 99.5kB/s]
     13%|#2        | 5.24M/41.5M [01:15<06:29, 97.5kB/s]
     13%|#2        | 5.26M/
 41.5M [01:15<06:35, 96.0kB/s]
     13%|#2        | 5.27M/41.5M [01:15<05:50, 108kB/s] 
     13%|#2        | 5.29M/41.5M [01:15<05:39, 112kB/s]
     13%|#2        | 5.30M/41.5M [01:15<05:47, 109kB/s]
     13%|#2        | 5.32M/41.5M [01:15<05:34, 113kB/s]
     13%|#2        | 5.34M/41.5M [01:16<05:06, 124kB/s]
     13%|#2        | 5.35M/41.5M [01:16<04:56, 128kB/s]
     13%|#2        | 5.37M/41.5M [01:16<05:29, 115kB/s]
     13%|#2        | 5.38M/41.5M [01:16<07:23, 85.3kB/s]
     13%|#3        | 5.42M/41.5M [01:16<04:58, 127kB/s] 
     13%|#3        | 5.44M/41.5M [01:16<04:45, 132kB/s]
     13%|#3        | 5.45M/41.5M [01:17<05:15, 120kB/s]
     13%|#3        | 5.47M/41.5M [01:17<05:03, 124kB/s]
     13%|#3        | 5.48M/41.5M [01:17<07:59, 78.7kB/s]
     13%|#3        | 5.52M/41.5M [01:17<04:56, 127kB/s] 
     13%|#3        | 5.55M/41.5M [01:18<05:15, 119kB/s]
     13%|#3        | 5.56M/41.5M [01:18<06:34, 95.4kB/s]
     13%|#3        | 5.59M/41.5M [01:18<06:24, 97.9kB/s]
     14%
 |#3        | 5.60M/41.5M [01:18<06:29, 96.6kB/s]
     14%|#3        | 5.62M/41.5M [01:18<06:33, 95.6kB/s]
     14%|#3        | 5.63M/41.5M [01:19<06:36, 94.9kB/s]
     14%|#3        | 5.65M/41.5M [01:19<05:58, 105kB/s] 
     14%|#3        | 5.66M/41.5M [01:19<05:44, 109kB/s]
     14%|#3        | 5.68M/41.5M [01:19<06:01, 104kB/s]
     14%|#3        | 5.70M/41.5M [01:19<05:55, 106kB/s]
     14%|#3        | 5.71M/41.5M [01:19<05:40, 110kB/s]
     14%|#3        | 5.73M/41.5M [01:19<05:59, 104kB/s]
     14%|#3        | 5.74M/41.5M [01:20<06:12, 101kB/s]
     14%|#3        | 5.76M/41.5M [01:20<05:38, 111kB/s]
     14%|#3        | 5.77M/41.5M [01:20<05:29, 114kB/s]
     14%|#3        | 5.79M/41.5M [01:20<05:51, 107kB/s]
     14%|#3        | 5.80M/41.5M [01:20<05:18, 117kB/s]
     14%|#4        | 5.82M/41.5M [01:20<05:27, 114kB/s]
     14%|#4        | 5.84M/41.5M [01:20<05:21, 116kB/s]
     14%|#4        | 5.85M/41.5M [01:21<04:58, 125kB/s]
     14%|#4        | 5.87M/41.5M [01:21<05:01, 12
 4kB/s]
     14%|#4        | 5.88M/41.5M [01:21<04:47, 130kB/s]
     14%|#4        | 5.90M/41.5M [01:21<04:53, 127kB/s]
     14%|#4        | 5.91M/41.5M [01:21<07:27, 83.4kB/s]
     14%|#4        | 5.95M/41.5M [01:21<04:25, 140kB/s] 
     14%|#4        | 5.98M/41.5M [01:22<04:45, 130kB/s]
     14%|#4        | 6.00M/41.5M [01:22<04:53, 127kB/s]
     14%|#4        | 6.02M/41.5M [01:22<04:41, 132kB/s]
     15%|#4        | 6.03M/41.5M [01:22<05:10, 120kB/s]
     15%|#4        | 6.05M/41.5M [01:22<05:58, 104kB/s]
     15%|#4        | 6.06M/41.5M [01:23<06:09, 101kB/s]
     15%|#4        | 6.09M/41.5M [01:23<07:09, 86.5kB/s]
     15%|#4        | 6.11M/41.5M [01:23<05:52, 105kB/s] 
     15%|#4        | 6.12M/41.5M [01:23<06:03, 102kB/s]
     15%|#4        | 6.14M/41.5M [01:23<06:13, 99.3kB/s]
     15%|#4        | 6.16M/41.5M [01:24<06:20, 97.3kB/s]
     15%|#4        | 6.17M/41.5M [01:24<10:08, 60.8kB/s]
     15%|#4        | 6.20M/41.5M [01:24<07:06, 86.7kB/s]
     15%|#4        | 6.22M/41.
 5M [01:24<07:44, 79.7kB/s]
     15%|#5        | 6.23M/41.5M [01:25<07:27, 82.6kB/s]
     15%|#5        | 6.25M/41.5M [01:25<07:14, 85.1kB/s]
     15%|#5        | 6.27M/41.5M [01:25<07:04, 87.1kB/s]
     15%|#5        | 6.28M/41.5M [01:25<06:56, 88.6kB/s]
     15%|#5        | 6.30M/41.5M [01:25<06:50, 89.8kB/s]
     15%|#5        | 6.31M/41.5M [01:26<06:46, 90.6kB/s]
     15%|#5        | 6.33M/41.5M [01:26<07:47, 78.8kB/s]
     15%|#5        | 6.35M/41.5M [01:26<07:16, 84.4kB/s]
     15%|#5        | 6.37M/41.5M [01:26<07:05, 86.5kB/s]
     15%|#5        | 6.38M/41.5M [01:26<06:57, 88.2kB/s]
     15%|#5        | 6.40M/41.5M [01:27<06:51, 89.4kB/s]
     15%|#5        | 6.41M/41.5M [01:27<06:46, 90.4kB/s]
     15%|#5        | 6.43M/41.5M [01:27<06:43, 91.1kB/s]
     16%|#5        | 6.45M/41.5M [01:27<06:41, 91.6kB/s]
     16%|#5        | 6.46M/41.5M [01:27<08:36, 71.1kB/s]
     16%|#5        | 6.49M/41.5M [01:28<06:10, 99.1kB/s]
     16%|#5        | 6.51M/41.5M [01:28<06:17, 97.3kB/s]
 
     16%|#5        | 6.52M/41.5M [01:28<06:21, 96.1kB/s]
     16%|#5        | 6.54M/41.5M [01:28<06:24, 95.3kB/s]
     16%|#5        | 6.55M/41.5M [01:28<06:27, 94.6kB/s]
     16%|#5        | 6.57M/41.5M [01:29<06:29, 94.1kB/s]
     16%|#5        | 6.59M/41.5M [01:29<06:30, 93.7kB/s]
     16%|#5        | 6.60M/41.5M [01:29<06:31, 93.4kB/s]
     16%|#5        | 6.62M/41.5M [01:29<06:32, 93.2kB/s]
     16%|#5        | 6.63M/41.5M [01:29<06:32, 93.1kB/s]
     16%|#6        | 6.65M/41.5M [01:30<08:30, 71.6kB/s]
     16%|#6        | 6.68M/41.5M [01:30<06:05, 99.8kB/s]
     16%|#6        | 6.70M/41.5M [01:30<06:12, 97.9kB/s]
     16%|#6        | 6.71M/41.5M [01:30<08:01, 75.8kB/s]
     16%|#6        | 6.74M/41.5M [01:31<05:59, 101kB/s] 
     16%|#6        | 6.76M/41.5M [01:31<06:06, 99.3kB/s]
     16%|#6        | 6.77M/41.5M [01:31<06:13, 97.5kB/s]
     16%|#6        | 6.79M/41.5M [01:31<06:42, 90.4kB/s]
     16%|#6        | 6.80M/41.5M [01:31<07:09, 84.6kB/s]
     16%|#6        | 6.82M/41
 .5M [01:31<06:27, 93.7kB/s]
     16%|#6        | 6.84M/41.5M [01:32<06:28, 93.4kB/s]
     17%|#6        | 6.85M/41.5M [01:32<08:23, 72.1kB/s]
     17%|#6        | 6.88M/41.5M [01:32<06:26, 93.8kB/s]
     17%|#6        | 6.89M/41.5M [01:32<07:22, 82.1kB/s]
     17%|#6        | 6.91M/41.5M [01:33<07:07, 84.8kB/s]
     17%|#6        | 6.93M/41.5M [01:33<06:51, 88.0kB/s]
     17%|#6        | 6.95M/41.5M [01:33<06:46, 89.2kB/s]
     17%|#6        | 6.96M/41.5M [01:33<06:41, 90.1kB/s]
     17%|#6        | 6.98M/41.5M [01:33<06:38, 90.9kB/s]
     17%|#6        | 6.99M/41.5M [01:34<06:35, 91.4kB/s]
     17%|#6        | 7.02M/41.5M [01:34<07:22, 81.6kB/s]
     17%|#6        | 7.05M/41.5M [01:34<05:38, 107kB/s] 
     17%|#7        | 7.06M/41.5M [01:34<05:50, 103kB/s]
     17%|#7        | 7.08M/41.5M [01:34<05:59, 100kB/s]
     17%|#7        | 7.09M/41.5M [01:35<06:07, 98.2kB/s]
     17%|#7        | 7.11M/41.5M [01:35<07:59, 75.2kB/s]
     17%|#7        | 7.13M/41.5M [01:35<06:37, 90.6kB/s]
  
    17%|#7        | 7.15M/41.5M [01:35<06:35, 91.1kB/s]
     17%|#7        | 7.16M/41.5M [01:35<06:33, 91.5kB/s]
     17%|#7        | 7.18M/41.5M [01:36<06:31, 91.9kB/s]
     17%|#7        | 7.20M/41.5M [01:36<06:30, 92.1kB/s]
     17%|#7        | 7.21M/41.5M [01:36<06:29, 92.3kB/s]
     17%|#7        | 7.23M/41.5M [01:36<06:28, 92.5kB/s]
     17%|#7        | 7.24M/41.5M [01:36<06:04, 98.4kB/s]
     18%|#7        | 7.27M/41.5M [01:37<05:43, 104kB/s] 
     18%|#7        | 7.28M/41.5M [01:37<05:55, 101kB/s]
     18%|#7        | 7.30M/41.5M [01:37<07:51, 76.1kB/s]
     18%|#7        | 7.33M/41.5M [01:37<05:31, 108kB/s] 
     18%|#7        | 7.34M/41.5M [01:37<06:03, 98.6kB/s]
     18%|#7        | 7.36M/41.5M [01:38<07:27, 80.0kB/s]
     18%|#7        | 7.38M/41.5M [01:38<07:31, 79.2kB/s]
     18%|#7        | 7.39M/41.5M [01:38<08:59, 66.2kB/s]
     18%|#7        | 7.41M/41.5M [01:38<08:15, 72.1kB/s]
     18%|#7        | 7.41M/41.5M [01:39<11:12, 53.1kB/s]
     18%|#7        | 7.44M/41.5
 M [01:39<08:17, 71.7kB/s]
     18%|#7        | 7.45M/41.5M [01:39<09:35, 62.1kB/s]
     18%|#7        | 7.46M/41.5M [01:40<10:07, 58.7kB/s]
     18%|#8        | 7.48M/41.5M [01:40<09:25, 63.1kB/s]
     18%|#8        | 7.48M/41.5M [01:40<09:28, 62.7kB/s]
     18%|#8        | 7.49M/41.5M [01:40<10:10, 58.4kB/s]
     18%|#8        | 7.51M/41.5M [01:40<08:44, 67.9kB/s]
     18%|#8        | 7.52M/41.5M [01:40<09:36, 61.8kB/s]
     18%|#8        | 7.53M/41.5M [01:41<08:23, 70.7kB/s]
     18%|#8        | 7.55M/41.5M [01:41<07:41, 77.1kB/s]
     18%|#8        | 7.56M/41.5M [01:41<07:15, 81.7kB/s]
     18%|#8        | 7.58M/41.5M [01:41<07:27, 79.4kB/s]
     18%|#8        | 7.59M/41.5M [01:41<07:53, 75.1kB/s]
     18%|#8        | 7.59M/41.5M [01:42<09:34, 61.9kB/s]
     18%|#8        | 7.62M/41.5M [01:42<06:34, 90.1kB/s]
     18%|#8        | 7.63M/41.5M [01:42<06:30, 90.9kB/s]
     18%|#8        | 7.65M/41.5M [01:42<08:25, 70.2kB/s]
     18%|#8        | 7.67M/41.5M [01:42<06:45, 87.4kB/s]
  
    19%|#8        | 7.69M/41.5M [01:43<08:24, 70.3kB/s]
     19%|#8        | 7.70M/41.5M [01:43<07:49, 75.5kB/s]
     19%|#8        | 7.72M/41.5M [01:43<09:13, 64.0kB/s]
     19%|#8        | 7.73M/41.5M [01:43<08:23, 70.3kB/s]
     19%|#8        | 7.74M/41.5M [01:44<09:07, 64.6kB/s]
     19%|#8        | 7.76M/41.5M [01:44<14:48, 39.8kB/s]
     19%|#8        | 7.79M/41.5M [01:44<08:57, 65.7kB/s]
     19%|#8        | 7.80M/41.5M [01:45<08:17, 71.1kB/s]
     19%|#8        | 7.82M/41.5M [01:45<11:10, 52.7kB/s]
     19%|#8        | 7.84M/41.5M [01:45<08:37, 68.2kB/s]
     19%|#8        | 7.86M/41.5M [01:46<09:40, 60.8kB/s]
     19%|#8        | 7.88M/41.5M [01:46<08:45, 67.0kB/s]
     19%|#9        | 7.89M/41.5M [01:46<08:05, 72.6kB/s]
     19%|#9        | 7.91M/41.5M [01:46<07:59, 73.4kB/s]
     19%|#9        | 7.92M/41.5M [01:47<08:54, 65.8kB/s]
     19%|#9        | 7.94M/41.5M [01:47<08:09, 71.9kB/s]
     19%|#9        | 7.95M/41.5M [01:47<07:36, 77.0kB/s]
     19%|#9        | 7.97M/41.
 5M [01:47<07:13, 81.1kB/s]
     19%|#9        | 7.98M/41.5M [01:47<08:50, 66.2kB/s]
     19%|#9        | 8.00M/41.5M [01:48<08:04, 72.4kB/s]
     19%|#9        | 8.01M/41.5M [01:48<08:52, 65.9kB/s]
     19%|#9        | 8.02M/41.5M [01:48<08:01, 72.9kB/s]
     19%|#9        | 8.04M/41.5M [01:48<07:27, 78.4kB/s]
     19%|#9        | 8.05M/41.5M [01:49<09:04, 64.3kB/s]
     19%|#9        | 8.08M/41.5M [01:49<07:30, 77.8kB/s]
     20%|#9        | 8.09M/41.5M [01:49<08:30, 68.6kB/s]
     20%|#9        | 8.11M/41.5M [01:49<10:31, 55.4kB/s]
     20%|#9        | 8.14M/41.5M [01:50<07:56, 73.4kB/s]
     20%|#9        | 8.16M/41.5M [01:50<07:54, 73.7kB/s]
     20%|#9        | 8.16M/41.5M [01:50<08:08, 71.5kB/s]
     20%|#9        | 8.18M/41.5M [01:50<07:35, 76.7kB/s]
     20%|#9        | 8.20M/41.5M [01:51<07:38, 76.2kB/s]
     20%|#9        | 8.20M/41.5M [01:51<11:14, 51.7kB/s]
     20%|#9        | 8.23M/41.5M [01:52<10:53, 53.4kB/s]
     20%|#9        | 8.26M/41.5M [01:52<08:32, 68.0kB/s]
 
     20%|#9        | 8.27M/41.5M [01:52<08:12, 70.7kB/s]
     20%|#9        | 8.29M/41.5M [01:52<09:20, 62.1kB/s]
     20%|#9        | 8.30M/41.5M [01:52<09:50, 59.0kB/s]
     20%|##        | 8.30M/41.5M [01:53<09:57, 58.2kB/s]
     20%|##        | 8.32M/41.5M [01:53<09:01, 64.2kB/s]
     20%|##        | 8.33M/41.5M [01:53<09:18, 62.3kB/s]
     20%|##        | 8.34M/41.5M [01:54<17:56, 32.3kB/s]
     20%|##        | 8.39M/41.5M [01:54<08:07, 71.2kB/s]
     20%|##        | 8.41M/41.5M [01:54<07:41, 75.1kB/s]
     20%|##        | 8.42M/41.5M [01:55<08:51, 65.3kB/s]
     20%|##        | 8.44M/41.5M [01:55<08:10, 70.7kB/s]
     20%|##        | 8.45M/41.5M [01:55<07:38, 75.5kB/s]
     20%|##        | 8.47M/41.5M [01:55<07:14, 79.6kB/s]
     20%|##        | 8.48M/41.5M [01:55<06:57, 82.9kB/s]
     20%|##        | 8.50M/41.5M [01:55<06:44, 85.6kB/s]
     21%|##        | 8.52M/41.5M [01:56<06:34, 87.5kB/s]
     21%|##        | 8.53M/41.5M [01:56<06:28, 89.0kB/s]
     21%|##        | 8.55M/41
 .5M [01:56<06:23, 90.1kB/s]
     21%|##        | 8.56M/41.5M [01:56<06:19, 90.9kB/s]
     21%|##        | 8.58M/41.5M [01:56<06:17, 91.4kB/s]
     21%|##        | 8.59M/41.5M [01:56<06:15, 91.8kB/s]
     21%|##        | 8.61M/41.5M [01:57<06:14, 92.1kB/s]
     21%|##        | 8.62M/41.5M [01:57<08:05, 71.0kB/s]
     21%|##        | 8.65M/41.5M [01:57<06:32, 87.8kB/s]
     21%|##        | 8.66M/41.5M [01:57<06:26, 89.1kB/s]
     21%|##        | 8.68M/41.5M [01:58<06:21, 90.1kB/s]
     21%|##        | 8.70M/41.5M [01:58<05:31, 104kB/s] 
     21%|##1       | 8.72M/41.5M [01:58<07:42, 74.3kB/s]
     21%|##1       | 8.76M/41.5M [01:58<04:52, 117kB/s] 
     21%|##1       | 8.77M/41.5M [01:58<05:09, 111kB/s]
     21%|##1       | 8.79M/41.5M [01:59<05:23, 106kB/s]
     21%|##1       | 8.80M/41.5M [01:59<05:35, 102kB/s]
     21%|##1       | 8.82M/41.5M [01:59<05:43, 99.6kB/s]
     21%|##1       | 8.84M/41.5M [01:59<05:50, 97.6kB/s]
     21%|##1       | 8.86M/41.5M [01:59<05:11, 110kB/s] 
   
   21%|##1       | 8.88M/41.5M [01:59<05:26, 105kB/s]
     21%|##1       | 8.90M/41.5M [02:00<04:57, 115kB/s]
     21%|##1       | 8.91M/41.5M [02:00<08:23, 67.8kB/s]
     22%|##1       | 8.97M/41.5M [02:00<05:07, 111kB/s] 
     22%|##1       | 8.98M/41.5M [02:01<06:27, 88.0kB/s]
     22%|##1       | 9.01M/41.5M [02:01<05:46, 98.3kB/s]
     22%|##1       | 9.02M/41.5M [02:01<05:50, 97.1kB/s]
     22%|##1       | 9.04M/41.5M [02:01<05:54, 96.0kB/s]
     22%|##1       | 9.05M/41.5M [02:02<05:57, 95.2kB/s]
     22%|##1       | 9.07M/41.5M [02:02<05:59, 94.5kB/s]
     22%|##1       | 9.09M/41.5M [02:02<06:01, 94.0kB/s]
     22%|##1       | 9.10M/41.5M [02:02<06:02, 93.7kB/s]
     22%|##1       | 9.12M/41.5M [02:02<06:03, 93.4kB/s]
     22%|##2       | 9.13M/41.5M [02:02<06:03, 93.2kB/s]
     22%|##2       | 9.15M/41.5M [02:03<06:04, 93.1kB/s]
     22%|##2       | 9.16M/41.5M [02:03<06:04, 93.0kB/s]
     22%|##2       | 9.18M/41.5M [02:03<05:42, 99.0kB/s]
     22%|##2       | 9.20M/41.5M 
 [02:03<05:48, 97.0kB/s]
     22%|##2       | 9.21M/41.5M [02:03<08:05, 69.7kB/s]
     22%|##2       | 9.25M/41.5M [02:04<04:55, 115kB/s] 
     22%|##2       | 9.27M/41.5M [02:04<05:10, 109kB/s]
     22%|##2       | 9.29M/41.5M [02:04<04:48, 117kB/s]
     22%|##2       | 9.30M/41.5M [02:04<05:06, 110kB/s]
     22%|##2       | 9.33M/41.5M [02:04<04:44, 119kB/s]
     23%|##2       | 9.34M/41.5M [02:04<05:03, 111kB/s]
     23%|##2       | 9.37M/41.5M [02:05<04:42, 119kB/s]
     23%|##2       | 9.39M/41.5M [02:05<04:29, 125kB/s]
     23%|##2       | 9.41M/41.5M [02:05<04:51, 116kB/s]
     23%|##2       | 9.43M/41.5M [02:05<04:34, 123kB/s]
     23%|##2       | 9.45M/41.5M [02:05<04:23, 128kB/s]
     23%|##2       | 9.47M/41.5M [02:06<06:51, 81.7kB/s]
     23%|##2       | 9.52M/41.5M [02:06<04:09, 134kB/s] 
     23%|##2       | 9.54M/41.5M [02:06<04:57, 112kB/s]
     23%|##3       | 9.55M/41.5M [02:06<05:10, 108kB/s]
     23%|##3       | 9.58M/41.5M [02:07<07:14, 77.0kB/s]
     23%|##3    
    | 9.63M/41.5M [02:07<04:38, 120kB/s] 
     23%|##3       | 9.65M/41.5M [02:07<04:39, 119kB/s]
     23%|##3       | 9.66M/41.5M [02:07<04:55, 113kB/s]
     23%|##3       | 9.68M/41.5M [02:08<05:09, 108kB/s]
     23%|##3       | 9.70M/41.5M [02:08<05:21, 104kB/s]
     23%|##3       | 9.72M/41.5M [02:08<05:09, 108kB/s]
     23%|##3       | 9.73M/41.5M [02:08<05:21, 104kB/s]
     23%|##3       | 9.75M/41.5M [02:08<05:12, 107kB/s]
     24%|##3       | 9.77M/41.5M [02:09<05:03, 110kB/s]
     24%|##3       | 9.79M/41.5M [02:09<04:58, 111kB/s]
     24%|##3       | 9.80M/41.5M [02:09<07:09, 77.4kB/s]
     24%|##3       | 9.84M/41.5M [02:09<05:05, 108kB/s] 
     24%|##3       | 9.85M/41.5M [02:09<05:18, 104kB/s]
     24%|##3       | 9.87M/41.5M [02:10<05:27, 101kB/s]
     24%|##3       | 9.89M/41.5M [02:10<04:56, 112kB/s]
     24%|##3       | 9.91M/41.5M [02:10<05:11, 106kB/s]
     24%|##3       | 9.92M/41.5M [02:10<06:57, 79.2kB/s]
     24%|##4       | 9.96M/41.5M [02:11<04:57, 111kB/s] 
      24%|##4       | 9.98M/41.5M [02:11<05:10, 107kB/s]
     24%|##4       | 10.0M/41.5M [02:11<04:46, 115kB/s]
     24%|##4       | 10.0M/41.5M [02:11<06:25, 85.5kB/s]
     24%|##4       | 10.0M/41.5M [02:11<05:02, 109kB/s] 
     24%|##4       | 10.1M/41.5M [02:12<05:14, 105kB/s]
     24%|##4       | 10.1M/41.5M [02:12<05:23, 102kB/s]
     24%|##4       | 10.1M/41.5M [02:12<08:33, 64.1kB/s]
     24%|##4       | 10.1M/41.5M [02:12<05:35, 97.9kB/s]
     24%|##4       | 10.1M/41.5M [02:13<06:56, 79.0kB/s]
     24%|##4       | 10.2M/41.5M [02:13<06:40, 81.9kB/s]
     25%|##4       | 10.2M/41.5M [02:13<06:29, 84.4kB/s]
     25%|##4       | 10.2M/41.5M [02:14<07:52, 69.4kB/s]
     25%|##4       | 10.2M/41.5M [02:14<10:58, 49.8kB/s]
     25%|##4       | 10.2M/41.5M [02:14<08:00, 68.2kB/s]
     25%|##4       | 10.2M/41.5M [02:15<08:12, 66.6kB/s]
     25%|##4       | 10.3M/41.5M [02:15<08:23, 65.1kB/s]
     25%|##4       | 10.3M/41.5M [02:15<11:46, 46.3kB/s]
     25%|##4       | 10.3M/41.5M
  [02:16<08:30, 64.0kB/s]
     25%|##4       | 10.3M/41.5M [02:16<07:52, 69.1kB/s]
     25%|##4       | 10.3M/41.5M [02:16<08:51, 61.4kB/s]
     25%|##4       | 10.4M/41.5M [02:16<08:03, 67.5kB/s]
     25%|##5       | 10.4M/41.5M [02:17<07:26, 73.0kB/s]
     25%|##5       | 10.4M/41.5M [02:17<10:33, 51.5kB/s]
     25%|##5       | 10.4M/41.5M [02:17<06:57, 78.0kB/s]
     25%|##5       | 10.4M/41.5M [02:18<08:17, 65.4kB/s]
     25%|##5       | 10.5M/41.5M [02:18<08:58, 60.4kB/s]
     25%|##5       | 10.5M/41.5M [02:18<09:23, 57.7kB/s]
     25%|##5       | 10.5M/41.5M [02:18<09:48, 55.3kB/s]
     25%|##5       | 10.5M/41.5M [02:18<10:11, 53.2kB/s]
     25%|##5       | 10.5M/41.5M [02:19<08:34, 63.1kB/s]
     25%|##5       | 10.5M/41.5M [02:19<09:13, 58.7kB/s]
     25%|##5       | 10.5M/41.5M [02:19<09:47, 55.3kB/s]
     25%|##5       | 10.5M/41.5M [02:19<08:13, 65.9kB/s]
     25%|##5       | 10.5M/41.5M [02:19<08:58, 60.3kB/s]
     25%|##5       | 10.5M/41.5M [02:20<07:45, 69.7kB/s]
   
   25%|##5       | 10.6M/41.5M [02:20<08:35, 62.9kB/s]
     25%|##5       | 10.6M/41.5M [02:20<07:32, 71.7kB/s]
     26%|##5       | 10.6M/41.5M [02:20<08:59, 60.1kB/s]
     26%|##5       | 10.6M/41.5M [02:20<06:46, 79.6kB/s]
     26%|##5       | 10.6M/41.5M [02:21<06:29, 83.0kB/s]
     26%|##5       | 10.6M/41.5M [02:21<08:00, 67.4kB/s]
     26%|##5       | 10.7M/41.5M [02:21<07:21, 73.3kB/s]
     26%|##5       | 10.7M/41.5M [02:21<06:53, 78.2kB/s]
     26%|##5       | 10.7M/41.5M [02:22<08:18, 64.8kB/s]
     26%|##5       | 10.7M/41.5M [02:22<06:34, 81.9kB/s]
     26%|##5       | 10.7M/41.5M [02:22<09:51, 54.5kB/s]
     26%|##5       | 10.8M/41.5M [02:23<07:39, 70.1kB/s]
     26%|##5       | 10.8M/41.5M [02:23<08:40, 61.9kB/s]
     26%|##5       | 10.8M/41.5M [02:23<08:41, 61.7kB/s]
     26%|##6       | 10.8M/41.5M [02:23<08:13, 65.3kB/s]
     26%|##6       | 10.8M/41.5M [02:23<08:19, 64.4kB/s]
     26%|##6       | 10.8M/41.5M [02:24<07:55, 67.7kB/s]
     26%|##6       | 10.8M/41.5
 M [02:24<08:05, 66.2kB/s]
     26%|##6       | 10.8M/41.5M [02:24<07:45, 69.1kB/s]
     26%|##6       | 10.8M/41.5M [02:24<07:58, 67.2kB/s]
     26%|##6       | 10.9M/41.5M [02:24<07:09, 74.8kB/s]
     26%|##6       | 10.9M/41.5M [02:24<06:40, 80.1kB/s]
     26%|##6       | 10.9M/41.5M [02:25<06:22, 83.9kB/s]
     26%|##6       | 10.9M/41.5M [02:25<06:10, 86.6kB/s]
     26%|##6       | 10.9M/41.5M [02:25<05:35, 95.5kB/s]
     26%|##6       | 10.9M/41.5M [02:25<06:51, 77.8kB/s]
     26%|##6       | 11.0M/41.5M [02:26<06:03, 88.0kB/s]
     26%|##6       | 11.0M/41.5M [02:26<07:29, 71.2kB/s]
     27%|##6       | 11.0M/41.5M [02:26<06:09, 86.4kB/s]
     27%|##6       | 11.0M/41.5M [02:26<07:31, 70.8kB/s]
     27%|##6       | 11.0M/41.5M [02:27<08:07, 65.5kB/s]
     27%|##6       | 11.0M/41.5M [02:27<08:43, 60.9kB/s]
     27%|##6       | 11.0M/41.5M [02:27<09:17, 57.3kB/s]
     27%|##6       | 11.1M/41.5M [02:27<07:58, 66.7kB/s]
     27%|##6       | 11.1M/41.5M [02:27<08:42, 61.1kB/s]
  
    27%|##6       | 11.1M/41.5M [02:28<09:19, 57.0kB/s]
     27%|##6       | 11.1M/41.5M [02:28<10:10, 52.2kB/s]
     27%|##6       | 11.1M/41.5M [02:28<08:33, 62.0kB/s]
     27%|##6       | 11.1M/41.5M [02:28<09:09, 58.0kB/s]
     27%|##6       | 11.1M/41.5M [02:28<09:40, 54.9kB/s]
     27%|##6       | 11.1M/41.5M [02:29<08:06, 65.4kB/s]
     27%|##6       | 11.1M/41.5M [02:29<08:50, 60.0kB/s]
     27%|##6       | 11.2M/41.5M [02:29<07:38, 69.4kB/s]
     27%|##6       | 11.2M/41.5M [02:29<08:26, 62.7kB/s]
     27%|##6       | 11.2M/41.5M [02:29<09:08, 57.9kB/s]
     27%|##6       | 11.2M/41.5M [02:29<07:45, 68.2kB/s]
     27%|##7       | 11.2M/41.5M [02:30<07:00, 75.5kB/s]
     27%|##7       | 11.2M/41.5M [02:30<06:33, 80.7kB/s]
     27%|##7       | 11.2M/41.5M [02:30<06:16, 84.3kB/s]
     27%|##7       | 11.3M/41.5M [02:30<06:05, 86.8kB/s]
     27%|##7       | 11.3M/41.5M [02:30<05:57, 88.6kB/s]
     27%|##7       | 11.3M/41.5M [02:31<05:52, 89.8kB/s]
     27%|##7       | 11.3M/41.
 5M [02:31<05:48, 90.7kB/s]
     27%|##7       | 11.3M/41.5M [02:31<05:46, 91.3kB/s]
     27%|##7       | 11.3M/41.5M [02:31<05:44, 91.7kB/s]
     27%|##7       | 11.4M/41.5M [02:31<04:58, 106kB/s] 
     27%|##7       | 11.4M/41.5M [02:31<05:09, 102kB/s]
     27%|##7       | 11.4M/41.5M [02:32<05:18, 99.2kB/s]
     28%|##7       | 11.4M/41.5M [02:32<06:08, 85.5kB/s]
     28%|##7       | 11.4M/41.5M [02:32<05:19, 98.7kB/s]
     28%|##7       | 11.5M/41.5M [02:32<05:24, 97.0kB/s]
     28%|##7       | 11.5M/41.5M [02:33<06:57, 75.5kB/s]
     28%|##7       | 11.5M/41.5M [02:33<06:36, 79.3kB/s]
     28%|##7       | 11.5M/41.5M [02:33<06:20, 82.7kB/s]
     28%|##7       | 11.5M/41.5M [02:34<09:21, 56.0kB/s]
     28%|##7       | 11.5M/41.5M [02:34<07:14, 72.3kB/s]
     28%|##7       | 11.6M/41.5M [02:34<07:10, 72.9kB/s]
     28%|##7       | 11.6M/41.5M [02:34<07:55, 66.0kB/s]
     28%|##7       | 11.6M/41.5M [02:34<07:16, 71.8kB/s]
     28%|##7       | 11.6M/41.5M [02:35<06:48, 76.8kB/s]
  
    28%|##8       | 11.6M/41.5M [02:35<06:51, 76.2kB/s]
     28%|##8       | 11.6M/41.5M [02:35<07:08, 73.1kB/s]
     28%|##8       | 11.6M/41.5M [02:35<06:38, 78.6kB/s]
     28%|##8       | 11.7M/41.5M [02:35<06:18, 82.6kB/s]
     28%|##8       | 11.7M/41.5M [02:35<06:05, 85.6kB/s]
     28%|##8       | 11.7M/41.5M [02:36<05:56, 87.7kB/s]
     28%|##8       | 11.7M/41.5M [02:36<05:03, 103kB/s] 
     28%|##8       | 11.7M/41.5M [02:36<07:07, 73.0kB/s]
     28%|##8       | 11.8M/41.5M [02:36<04:55, 105kB/s] 
     28%|##8       | 11.8M/41.5M [02:37<05:24, 96.0kB/s]
     28%|##8       | 11.8M/41.5M [02:37<05:27, 95.2kB/s]
     28%|##8       | 11.8M/41.5M [02:37<06:37, 78.3kB/s]
     29%|##8       | 11.8M/41.5M [02:37<05:34, 93.1kB/s]
     29%|##8       | 11.8M/41.5M [02:38<08:26, 61.4kB/s]
     29%|##8       | 11.9M/41.5M [02:38<06:25, 80.6kB/s]
     29%|##8       | 11.9M/41.5M [02:38<06:28, 79.9kB/s]
     29%|##8       | 11.9M/41.5M [02:38<06:14, 82.7kB/s]
     29%|##8       | 11.9M/41.
 5M [02:39<06:04, 85.2kB/s]
     29%|##8       | 11.9M/41.5M [02:39<07:08, 72.2kB/s]
     29%|##8       | 12.0M/41.5M [02:39<07:00, 73.7kB/s]
     29%|##8       | 12.0M/41.5M [02:39<06:35, 78.3kB/s]
     29%|##8       | 12.0M/41.5M [02:39<06:17, 82.0kB/s]
     29%|##8       | 12.0M/41.5M [02:40<07:42, 66.9kB/s]
     29%|##8       | 12.0M/41.5M [02:40<05:52, 87.5kB/s]
     29%|##9       | 12.0M/41.5M [02:40<07:16, 70.7kB/s]
     29%|##9       | 12.1M/41.5M [02:40<06:47, 75.7kB/s]
     29%|##9       | 12.1M/41.5M [02:41<06:44, 76.2kB/s]
     29%|##9       | 12.1M/41.5M [02:41<07:40, 66.9kB/s]
     29%|##9       | 12.1M/41.5M [02:41<07:03, 72.8kB/s]
     29%|##9       | 12.1M/41.5M [02:41<07:44, 66.3kB/s]
     29%|##9       | 12.1M/41.5M [02:42<07:00, 73.2kB/s]
     29%|##9       | 12.1M/41.5M [02:42<06:32, 78.5kB/s]
     29%|##9       | 12.1M/41.5M [02:42<09:25, 54.4kB/s]
     29%|##9       | 12.2M/41.5M [02:42<05:59, 85.5kB/s]
     29%|##9       | 12.2M/41.5M [02:43<08:49, 58.0kB/s]
 
     29%|##9       | 12.2M/41.5M [02:43<06:12, 82.5kB/s]
     30%|##9       | 12.2M/41.5M [02:43<06:40, 76.6kB/s]
     30%|##9       | 12.3M/41.5M [02:44<07:05, 72.1kB/s]
     30%|##9       | 12.3M/41.5M [02:44<08:30, 60.0kB/s]
     30%|##9       | 12.3M/41.5M [02:44<07:17, 70.0kB/s]
     30%|##9       | 12.3M/41.5M [02:44<06:47, 75.2kB/s]
     30%|##9       | 12.3M/41.5M [02:45<07:59, 63.8kB/s]
     30%|##9       | 12.3M/41.5M [02:45<08:51, 57.5kB/s]
     30%|##9       | 12.3M/41.5M [02:45<09:12, 55.3kB/s]
     30%|##9       | 12.4M/41.5M [02:45<07:58, 63.8kB/s]
     30%|##9       | 12.4M/41.5M [02:45<08:32, 59.5kB/s]
     30%|##9       | 12.4M/41.5M [02:46<07:27, 68.1kB/s]
     30%|##9       | 12.4M/41.5M [02:46<06:48, 74.6kB/s]
     30%|##9       | 12.4M/41.5M [02:46<07:35, 66.9kB/s]
     30%|##9       | 12.4M/41.5M [02:46<06:51, 74.1kB/s]
     30%|##9       | 12.4M/41.5M [02:47<08:13, 61.7kB/s]
     30%|###       | 12.5M/41.5M [02:47<06:40, 76.0kB/s]
     30%|###       | 12.5M/41
 .5M [02:47<06:56, 73.1kB/s]
     30%|###       | 12.5M/41.5M [02:47<07:42, 65.8kB/s]
     30%|###       | 12.5M/41.5M [02:47<08:50, 57.3kB/s]
     30%|###       | 12.5M/41.5M [02:48<06:35, 76.8kB/s]
     30%|###       | 12.5M/41.5M [02:48<06:39, 76.1kB/s]
     30%|###       | 12.5M/41.5M [02:48<06:55, 73.1kB/s]
     30%|###       | 12.5M/41.5M [02:48<07:42, 65.6kB/s]
     30%|###       | 12.6M/41.5M [02:48<06:53, 73.4kB/s]
     30%|###       | 12.6M/41.5M [02:48<06:23, 79.0kB/s]
     30%|###       | 12.6M/41.5M [02:49<08:18, 60.8kB/s]
     30%|###       | 12.6M/41.5M [02:49<06:00, 83.9kB/s]
     30%|###       | 12.6M/41.5M [02:49<06:35, 76.4kB/s]
     30%|###       | 12.6M/41.5M [02:49<06:15, 80.5kB/s]
     31%|###       | 12.7M/41.5M [02:50<06:49, 73.8kB/s]
     31%|###       | 12.7M/41.5M [02:50<06:24, 78.5kB/s]
     31%|###       | 12.7M/41.5M [02:50<06:07, 82.3kB/s]
     31%|###       | 12.7M/41.5M [02:50<06:17, 80.0kB/s]
     31%|###       | 12.7M/41.5M [02:50<06:26, 78.1kB/s]
      31%|###       | 12.7M/41.5M [02:51<05:43, 87.9kB/s]
     31%|###       | 12.8M/41.5M [02:51<06:02, 83.1kB/s]
     31%|###       | 12.8M/41.5M [02:51<05:50, 85.8kB/s]
     31%|###       | 12.8M/41.5M [02:51<05:42, 87.8kB/s]
     31%|###       | 12.8M/41.5M [02:51<05:12, 96.3kB/s]
     31%|###       | 12.8M/41.5M [02:51<05:15, 95.2kB/s]
     31%|###       | 12.8M/41.5M [02:52<05:43, 87.4kB/s]
     31%|###       | 12.9M/41.5M [02:52<04:53, 102kB/s] 
     31%|###1      | 12.9M/41.5M [02:52<05:21, 93.3kB/s]
     31%|###1      | 12.9M/41.5M [02:52<06:52, 72.7kB/s]
     31%|###1      | 12.9M/41.5M [02:53<05:37, 88.7kB/s]
     31%|###1      | 12.9M/41.5M [02:53<05:34, 89.6kB/s]
     31%|###1      | 13.0M/41.5M [02:53<06:54, 72.3kB/s]
     31%|###1      | 13.0M/41.5M [02:54<08:18, 60.0kB/s]
     31%|###1      | 13.0M/41.5M [02:54<07:29, 66.5kB/s]
     31%|###1      | 13.0M/41.5M [02:54<08:01, 62.1kB/s]
     31%|###1      | 13.0M/41.5M [02:54<08:32, 58.3kB/s]
     31%|###1      | 13.0M/4
 1.5M [02:54<08:25, 59.1kB/s]
     31%|###1      | 13.0M/41.5M [02:55<08:57, 55.6kB/s]
     31%|###1      | 13.0M/41.5M [02:55<09:23, 52.9kB/s]
     31%|###1      | 13.0M/41.5M [02:55<12:34, 39.6kB/s]
     31%|###1      | 13.0M/41.5M [02:55<09:28, 52.5kB/s]
     31%|###1      | 13.1M/41.5M [02:55<09:45, 50.9kB/s]
     32%|###1      | 13.1M/41.5M [02:56<08:33, 58.1kB/s]
     32%|###1      | 13.1M/41.5M [02:56<09:01, 55.0kB/s]
     32%|###1      | 13.1M/41.5M [02:56<08:45, 56.7kB/s]
     32%|###1      | 13.1M/41.5M [02:56<08:25, 58.8kB/s]
     32%|###1      | 13.1M/41.5M [02:56<08:23, 59.1kB/s]
     32%|###1      | 13.1M/41.5M [02:56<08:16, 60.0kB/s]
     32%|###1      | 13.1M/41.5M [02:57<08:22, 59.1kB/s]
     32%|###1      | 13.1M/41.5M [02:57<07:05, 69.9kB/s]
     32%|###1      | 13.1M/41.5M [02:57<07:06, 69.7kB/s]
     32%|###1      | 13.2M/41.5M [02:57<07:05, 69.8kB/s]
     32%|###1      | 13.2M/41.5M [02:57<08:57, 55.3kB/s]
     32%|###1      | 13.2M/41.5M [02:58<09:19, 53.0kB/s]
 
     32%|###1      | 13.2M/41.5M [02:58<10:33, 46.8kB/s]
     32%|###1      | 13.2M/41.5M [02:58<07:25, 66.5kB/s]
     32%|###1      | 13.2M/41.5M [02:58<07:59, 61.8kB/s]
     32%|###1      | 13.2M/41.5M [02:59<08:31, 58.0kB/s]
     32%|###1      | 13.3M/41.5M [02:59<07:21, 67.0kB/s]
     32%|###1      | 13.3M/41.5M [02:59<12:25, 39.7kB/s]
     32%|###2      | 13.3M/41.5M [03:00<08:15, 59.7kB/s]
     32%|###2      | 13.3M/41.5M [03:00<08:40, 56.7kB/s]
     32%|###2      | 13.3M/41.5M [03:00<09:04, 54.2kB/s]
     32%|###2      | 13.3M/41.5M [03:00<07:40, 64.1kB/s]
     32%|###2      | 13.3M/41.5M [03:00<08:17, 59.4kB/s]
     32%|###2      | 13.3M/41.5M [03:01<09:10, 53.6kB/s]
     32%|###2      | 13.4M/41.5M [03:01<07:48, 62.9kB/s]
     32%|###2      | 13.4M/41.5M [03:01<08:22, 58.7kB/s]
     32%|###2      | 13.4M/41.5M [03:01<08:51, 55.5kB/s]
     32%|###2      | 13.4M/41.5M [03:02<11:34, 42.4kB/s]
     32%|###2      | 13.4M/41.5M [03:02<07:55, 62.0kB/s]
     32%|###2      | 13.4M/
 41.5M [03:02<08:23, 58.5kB/s]
     32%|###2      | 13.4M/41.5M [03:02<07:20, 66.8kB/s]
     32%|###2      | 13.4M/41.5M [03:02<07:57, 61.5kB/s]
     32%|###2      | 13.5M/41.5M [03:03<10:46, 45.5kB/s]
     33%|###2      | 13.5M/41.5M [03:03<07:37, 64.1kB/s]
     33%|###2      | 13.5M/41.5M [03:03<08:07, 60.2kB/s]
     33%|###2      | 13.5M/41.5M [03:03<08:36, 56.9kB/s]
     33%|###2      | 13.5M/41.5M [03:04<07:24, 66.0kB/s]
     33%|###2      | 13.5M/41.5M [03:04<10:12, 47.9kB/s]
     33%|###2      | 13.5M/41.5M [03:04<07:04, 69.0kB/s]
     33%|###2      | 13.6M/41.5M [03:04<07:41, 63.4kB/s]
     33%|###2      | 13.6M/41.5M [03:04<08:16, 58.9kB/s]
     33%|###2      | 13.6M/41.5M [03:05<08:47, 55.5kB/s]
     33%|###2      | 13.6M/41.5M [03:05<09:12, 53.0kB/s]
     33%|###2      | 13.6M/41.5M [03:05<07:34, 64.3kB/s]
     33%|###2      | 13.6M/41.5M [03:05<08:14, 59.1kB/s]
     33%|###2      | 13.6M/41.5M [03:05<08:47, 55.4kB/s]
     33%|###2      | 13.6M/41.5M [03:06<07:19, 66.4kB/s
 ]
     33%|###2      | 13.6M/41.5M [03:06<08:03, 60.5kB/s]
     33%|###2      | 13.6M/41.5M [03:06<06:56, 70.1kB/s]
     33%|###2      | 13.7M/41.5M [03:06<06:19, 76.8kB/s]
     33%|###2      | 13.7M/41.5M [03:06<07:44, 62.8kB/s]
     33%|###3      | 13.7M/41.5M [03:07<05:56, 81.7kB/s]
     33%|###3      | 13.7M/41.5M [03:07<05:44, 84.6kB/s]
     33%|###3      | 13.7M/41.5M [03:07<05:35, 86.8kB/s]
     33%|###3      | 13.8M/41.5M [03:07<05:28, 88.5kB/s]
     33%|###3      | 13.8M/41.5M [03:07<05:24, 89.7kB/s]
     33%|###3      | 13.8M/41.5M [03:07<05:20, 90.6kB/s]
     33%|###3      | 13.8M/41.5M [03:08<05:18, 91.2kB/s]
     33%|###3      | 13.8M/41.5M [03:08<05:16, 91.7kB/s]
     33%|###3      | 13.8M/41.5M [03:08<05:15, 92.0kB/s]
     33%|###3      | 13.8M/41.5M [03:08<05:14, 92.2kB/s]
     33%|###3      | 13.9M/41.5M [03:08<05:13, 92.4kB/s]
     33%|###3      | 13.9M/41.5M [03:09<06:46, 71.2kB/s]
     33%|###3      | 13.9M/41.5M [03:09<05:29, 87.9kB/s]
     34%|###3      | 13.9M
 /41.5M [03:09<05:24, 89.2kB/s]
     34%|###3      | 13.9M/41.5M [03:09<05:20, 90.1kB/s]
     34%|###3      | 13.9M/41.5M [03:09<05:17, 90.9kB/s]
     34%|###3      | 14.0M/41.5M [03:10<05:15, 91.4kB/s]
     34%|###3      | 14.0M/41.5M [03:10<05:35, 85.9kB/s]
     34%|###3      | 14.0M/41.5M [03:10<05:07, 93.9kB/s]
     34%|###3      | 14.0M/41.5M [03:10<05:08, 93.6kB/s]
     34%|###3      | 14.0M/41.5M [03:10<05:08, 93.3kB/s]
     34%|###3      | 14.0M/41.5M [03:10<05:09, 93.1kB/s]
     34%|###3      | 14.1M/41.5M [03:11<05:09, 93.0kB/s]
     34%|###3      | 14.1M/41.5M [03:11<04:29, 107kB/s] 
     34%|###3      | 14.1M/41.5M [03:11<04:39, 103kB/s]
     34%|###4      | 14.1M/41.5M [03:11<04:48, 99.7kB/s]
     34%|###4      | 14.1M/41.5M [03:11<04:17, 111kB/s] 
     34%|###4      | 14.1M/41.5M [03:12<04:30, 106kB/s]
     34%|###4      | 14.2M/41.5M [03:12<05:00, 95.3kB/s]
     34%|###4      | 14.2M/41.5M [03:12<04:43, 101kB/s] 
     34%|###4      | 14.2M/41.5M [03:12<04:50, 98.6kB/s]
 
     34%|###4      | 14.2M/41.5M [03:12<06:25, 74.2kB/s]
     34%|###4      | 14.2M/41.5M [03:13<07:54, 60.3kB/s]
     34%|###4      | 14.3M/41.5M [03:13<05:38, 84.3kB/s]
     34%|###4      | 14.3M/41.5M [03:13<05:48, 81.9kB/s]
     34%|###4      | 14.3M/41.5M [03:14<06:36, 72.0kB/s]
     34%|###4      | 14.3M/41.5M [03:14<08:54, 53.3kB/s]
     35%|###4      | 14.3M/41.5M [03:14<06:10, 76.8kB/s]
     35%|###4      | 14.4M/41.5M [03:15<08:36, 55.1kB/s]
     35%|###4      | 14.4M/41.5M [03:15<06:36, 71.7kB/s]
     35%|###4      | 14.4M/41.5M [03:15<06:53, 68.8kB/s]
     35%|###4      | 14.4M/41.5M [03:16<08:39, 54.7kB/s]
     35%|###4      | 14.4M/41.5M [03:16<06:30, 72.6kB/s]
     35%|###4      | 14.5M/41.5M [03:16<07:26, 63.5kB/s]
     35%|###4      | 14.5M/41.5M [03:17<06:48, 69.3kB/s]
     35%|###4      | 14.5M/41.5M [03:17<06:20, 74.5kB/s]
     35%|###4      | 14.5M/41.5M [03:17<05:58, 78.9kB/s]
     35%|###4      | 14.5M/41.5M [03:17<07:10, 65.7kB/s]
     35%|###5      | 14.5M/
 41.5M [03:17<06:33, 71.8kB/s]
     35%|###5      | 14.5M/41.5M [03:18<06:07, 76.9kB/s]
     35%|###5      | 14.6M/41.5M [03:18<05:48, 81.0kB/s]
     35%|###5      | 14.6M/41.5M [03:18<05:35, 84.1kB/s]
     35%|###5      | 14.6M/41.5M [03:18<05:25, 86.5kB/s]
     35%|###5      | 14.6M/41.5M [03:18<06:50, 68.7kB/s]
     35%|###5      | 14.6M/41.5M [03:19<04:51, 96.7kB/s]
     35%|###5      | 14.7M/41.5M [03:19<04:54, 95.7kB/s]
     35%|###5      | 14.7M/41.5M [03:19<04:56, 94.9kB/s]
     35%|###5      | 14.7M/41.5M [03:19<06:20, 73.8kB/s]
     35%|###5      | 14.7M/41.5M [03:20<06:28, 72.2kB/s]
     36%|###5      | 14.7M/41.5M [03:20<05:24, 86.5kB/s]
     36%|###5      | 14.8M/41.5M [03:20<05:18, 88.0kB/s]
     36%|###5      | 14.8M/41.5M [03:20<05:14, 89.2kB/s]
     36%|###5      | 14.8M/41.5M [03:21<06:32, 71.3kB/s]
     36%|###5      | 14.8M/41.5M [03:21<07:07, 65.5kB/s]
     36%|###5      | 14.8M/41.5M [03:21<08:03, 57.9kB/s]
     36%|###5      | 14.8M/41.5M [03:22<08:25, 55.3kB/s
 ]
     36%|###5      | 14.9M/41.5M [03:22<07:31, 61.9kB/s]
     36%|###5      | 14.9M/41.5M [03:22<07:51, 59.1kB/s]
     36%|###5      | 14.9M/41.5M [03:22<08:13, 56.6kB/s]
     36%|###5      | 14.9M/41.5M [03:23<07:11, 64.6kB/s]
     36%|###5      | 14.9M/41.5M [03:23<07:42, 60.2kB/s]
     36%|###5      | 14.9M/41.5M [03:23<08:11, 56.7kB/s]
     36%|###5      | 14.9M/41.5M [03:23<06:59, 66.3kB/s]
     36%|###5      | 14.9M/41.5M [03:23<07:37, 60.8kB/s]
     36%|###6      | 14.9M/41.5M [03:23<08:10, 56.7kB/s]
     36%|###6      | 15.0M/41.5M [03:24<06:54, 67.1kB/s]
     36%|###6      | 15.0M/41.5M [03:24<07:35, 61.1kB/s]
     36%|###6      | 15.0M/41.5M [03:24<06:35, 70.4kB/s]
     36%|###6      | 15.0M/41.5M [03:24<07:18, 63.3kB/s]
     36%|###6      | 15.0M/41.5M [03:24<06:25, 72.1kB/s]
     36%|###6      | 15.0M/41.5M [03:25<05:54, 78.2kB/s]
     36%|###6      | 15.0M/41.5M [03:25<05:36, 82.6kB/s]
     36%|###6      | 15.0M/41.5M [03:25<07:01, 65.8kB/s]
     36%|###6      | 15.1M
 /41.5M [03:25<09:27, 48.9kB/s]
     36%|###6      | 15.1M/41.5M [03:26<05:50, 79.0kB/s]
     36%|###6      | 15.1M/41.5M [03:26<05:36, 82.3kB/s]
     36%|###6      | 15.1M/41.5M [03:26<08:11, 56.2kB/s]
     37%|###6      | 15.1M/41.5M [03:26<05:39, 81.3kB/s]
     37%|###6      | 15.2M/41.5M [03:27<05:29, 83.8kB/s]
     37%|###6      | 15.2M/41.5M [03:27<08:11, 56.1kB/s]
     37%|###6      | 15.2M/41.5M [03:27<06:10, 74.3kB/s]
     37%|###6      | 15.2M/41.5M [03:28<07:06, 64.5kB/s]
     37%|###6      | 15.2M/41.5M [03:28<06:32, 70.2kB/s]
     37%|###6      | 15.2M/41.5M [03:28<07:27, 61.5kB/s]
     37%|###6      | 15.3M/41.5M [03:28<06:44, 68.0kB/s]
     37%|###6      | 15.3M/41.5M [03:29<06:13, 73.6kB/s]
     37%|###6      | 15.3M/41.5M [03:29<05:50, 78.3kB/s]
     37%|###6      | 15.3M/41.5M [03:29<05:34, 82.1kB/s]
     37%|###6      | 15.3M/41.5M [03:29<05:22, 85.0kB/s]
     37%|###6      | 15.3M/41.5M [03:30<08:10, 55.9kB/s]
     37%|###7      | 15.4M/41.5M [03:30<07:33, 60.4kB/
 s]
     37%|###7      | 15.4M/41.5M [03:30<06:01, 75.7kB/s]
     37%|###7      | 15.4M/41.5M [03:31<11:02, 41.3kB/s]
     37%|###7      | 15.4M/41.5M [03:31<08:30, 53.5kB/s]
     37%|###7      | 15.4M/41.5M [03:32<10:03, 45.2kB/s]
     37%|###7      | 15.5M/41.5M [03:32<10:01, 45.4kB/s]
     37%|###7      | 15.5M/41.5M [03:32<09:58, 45.6kB/s]
     37%|###7      | 15.5M/41.5M [03:32<09:56, 45.7kB/s]
     37%|###7      | 15.5M/41.5M [03:33<09:54, 45.9kB/s]
     37%|###7      | 15.5M/41.5M [03:33<09:52, 46.0kB/s]
     37%|###7      | 15.5M/41.5M [03:33<09:51, 46.1kB/s]
     37%|###7      | 15.5M/41.5M [03:33<12:28, 36.4kB/s]
     37%|###7      | 15.5M/41.5M [03:34<11:18, 40.2kB/s]
     37%|###7      | 15.5M/41.5M [03:34<10:56, 41.5kB/s]
     37%|###7      | 15.5M/41.5M [03:34<10:38, 42.6kB/s]
     37%|###7      | 15.5M/41.5M [03:34<08:13, 55.1kB/s]
     37%|###7      | 15.6M/41.5M [03:34<10:50, 41.8kB/s]
     38%|###7      | 15.6M/41.5M [03:35<08:54, 50.8kB/s]
     38%|###7      | 15.6
 M/41.5M [03:35<09:05, 49.8kB/s]
     38%|###7      | 15.6M/41.5M [03:35<08:43, 51.9kB/s]
     38%|###7      | 15.6M/41.5M [03:35<08:58, 50.4kB/s]
     38%|###7      | 15.6M/41.5M [03:35<09:11, 49.3kB/s]
     38%|###7      | 15.6M/41.5M [03:36<09:20, 48.4kB/s]
     38%|###7      | 15.6M/41.5M [03:36<07:22, 61.3kB/s]
     38%|###7      | 15.6M/41.5M [03:36<07:56, 56.9kB/s]
     38%|###7      | 15.6M/41.5M [03:36<06:41, 67.5kB/s]
     38%|###7      | 15.7M/41.5M [03:36<07:22, 61.3kB/s]
     38%|###7      | 15.7M/41.5M [03:36<06:23, 70.7kB/s]
     38%|###7      | 15.7M/41.5M [03:37<07:06, 63.4kB/s]
     38%|###7      | 15.7M/41.5M [03:37<08:06, 55.6kB/s]
     38%|###7      | 15.7M/41.5M [03:37<06:15, 72.0kB/s]
     38%|###7      | 15.7M/41.5M [03:37<06:28, 69.6kB/s]
     38%|###7      | 15.7M/41.5M [03:38<07:33, 59.6kB/s]
     38%|###7      | 15.8M/41.5M [03:38<06:22, 70.6kB/s]
     38%|###8      | 15.8M/41.5M [03:38<06:32, 68.7kB/s]
     38%|###8      | 15.8M/41.5M [03:38<05:59, 75.0kB
 /s]
     38%|###8      | 15.8M/41.5M [03:39<08:29, 52.9kB/s]
     38%|###8      | 15.8M/41.5M [03:39<06:08, 73.0kB/s]
     38%|###8      | 15.8M/41.5M [03:39<06:29, 69.0kB/s]
     38%|###8      | 15.8M/41.5M [03:39<06:34, 68.2kB/s]
     38%|###8      | 15.9M/41.5M [03:39<05:59, 74.8kB/s]
     38%|###8      | 15.9M/41.5M [03:40<08:32, 52.4kB/s]
     38%|###8      | 15.9M/41.5M [03:40<06:07, 72.9kB/s]
     38%|###8      | 15.9M/41.5M [03:40<05:45, 77.7kB/s]
     38%|###8      | 15.9M/41.5M [03:40<06:34, 68.0kB/s]
     38%|###8      | 15.9M/41.5M [03:41<06:02, 73.9kB/s]
     38%|###8      | 15.9M/41.5M [03:41<08:22, 53.3kB/s]
     38%|###8      | 16.0M/41.5M [03:41<06:08, 72.6kB/s]
     39%|###8      | 16.0M/41.5M [03:42<08:32, 52.2kB/s]
     39%|###8      | 16.0M/41.5M [03:42<07:12, 61.8kB/s]
     39%|###8      | 16.0M/41.5M [03:42<07:54, 56.3kB/s]
     39%|###8      | 16.0M/41.5M [03:42<08:12, 54.3kB/s]
     39%|###8      | 16.0M/41.5M [03:42<07:04, 62.9kB/s]
     39%|###8      | 16.
 0M/41.5M [03:43<09:23, 47.4kB/s]
     39%|###8      | 16.1M/41.5M [03:43<07:45, 57.3kB/s]
     39%|###8      | 16.1M/41.5M [03:43<08:07, 54.7kB/s]
     39%|###8      | 16.1M/41.5M [03:43<08:39, 51.3kB/s]
     39%|###8      | 16.1M/41.5M [03:44<07:19, 60.6kB/s]
     39%|###8      | 16.1M/41.5M [03:44<07:45, 57.2kB/s]
     39%|###8      | 16.1M/41.5M [03:44<08:09, 54.4kB/s]
     39%|###8      | 16.1M/41.5M [03:44<06:50, 64.7kB/s]
     39%|###8      | 16.1M/41.5M [03:45<15:59, 27.7kB/s]
     39%|###9      | 16.2M/41.5M [03:45<06:29, 68.0kB/s]
     39%|###9      | 16.2M/41.5M [03:45<06:13, 70.9kB/s]
     39%|###9      | 16.2M/41.5M [03:46<08:06, 54.5kB/s]
     39%|###9      | 16.2M/41.5M [03:46<06:22, 69.3kB/s]
     39%|###9      | 16.3M/41.5M [03:47<07:09, 61.6kB/s]
     39%|###9      | 16.3M/41.5M [03:47<07:46, 56.7kB/s]
     39%|###9      | 16.3M/41.5M [03:47<07:15, 60.6kB/s]
     39%|###9      | 16.3M/41.5M [03:47<07:37, 57.7kB/s]
     39%|###9      | 16.3M/41.5M [03:47<07:31, 58.5k
 B/s]
     39%|###9      | 16.3M/41.5M [03:48<07:56, 55.4kB/s]
     39%|###9      | 16.3M/41.5M [03:48<08:50, 49.7kB/s]
     39%|###9      | 16.3M/41.5M [03:48<08:25, 52.2kB/s]
     39%|###9      | 16.3M/41.5M [03:48<07:22, 59.6kB/s]
     39%|###9      | 16.4M/41.5M [03:48<07:18, 60.1kB/s]
     39%|###9      | 16.4M/41.5M [03:48<07:50, 56.0kB/s]
     39%|###9      | 16.4M/41.5M [03:49<08:15, 53.1kB/s]
     39%|###9      | 16.4M/41.5M [03:49<06:45, 65.0kB/s]
     40%|###9      | 16.4M/41.5M [03:49<07:23, 59.4kB/s]
     40%|###9      | 16.4M/41.5M [03:49<06:44, 65.0kB/s]
     40%|###9      | 16.4M/41.5M [03:49<06:52, 63.8kB/s]
     40%|###9      | 16.4M/41.5M [03:50<06:01, 72.6kB/s]
     40%|###9      | 16.4M/41.5M [03:50<05:56, 73.6kB/s]
     40%|###9      | 16.5M/41.5M [03:50<05:31, 79.2kB/s]
     40%|###9      | 16.5M/41.5M [03:50<05:51, 74.5kB/s]
     40%|###9      | 16.5M/41.5M [03:50<07:07, 61.3kB/s]
     40%|###9      | 16.5M/41.5M [03:51<04:59, 87.6kB/s]
     40%|###9      | 16
 .5M/41.5M [03:51<04:54, 88.9kB/s]
     40%|###9      | 16.5M/41.5M [03:51<05:12, 83.8kB/s]
     40%|###9      | 16.6M/41.5M [03:51<06:01, 72.4kB/s]
     40%|###9      | 16.6M/41.5M [03:51<06:34, 66.2kB/s]
     40%|###9      | 16.6M/41.5M [03:52<05:58, 72.9kB/s]
     40%|####      | 16.6M/41.5M [03:52<05:33, 78.2kB/s]
     40%|####      | 16.6M/41.5M [03:52<05:17, 82.2kB/s]
     40%|####      | 16.6M/41.5M [03:52<05:05, 85.2kB/s]
     40%|####      | 16.6M/41.5M [03:53<06:24, 67.8kB/s]
     40%|####      | 16.7M/41.5M [03:53<05:52, 73.9kB/s]
     40%|####      | 16.7M/41.5M [03:53<05:30, 78.8kB/s]
     40%|####      | 16.7M/41.5M [03:53<05:15, 82.5kB/s]
     40%|####      | 16.7M/41.5M [03:53<06:28, 66.8kB/s]
     40%|####      | 16.7M/41.5M [03:54<05:09, 83.8kB/s]
     40%|####      | 16.8M/41.5M [03:54<05:04, 85.1kB/s]
     40%|####      | 16.8M/41.5M [03:54<05:13, 82.6kB/s]
     40%|####      | 16.8M/41.5M [03:54<05:03, 85.3kB/s]
     40%|####      | 16.8M/41.5M [03:54<04:39, 92.6
 kB/s]
     41%|####      | 16.8M/41.5M [03:55<04:56, 87.3kB/s]
     41%|####      | 16.8M/41.5M [03:55<04:51, 88.9kB/s]
     41%|####      | 16.8M/41.5M [03:55<04:47, 90.0kB/s]
     41%|####      | 16.9M/41.5M [03:55<04:44, 90.8kB/s]
     41%|####      | 16.9M/41.5M [03:55<05:44, 74.9kB/s]
     41%|####      | 16.9M/41.5M [03:56<04:45, 90.3kB/s]
     41%|####      | 16.9M/41.5M [03:56<04:43, 91.0kB/s]
     41%|####      | 16.9M/41.5M [03:56<05:17, 81.1kB/s]
     41%|####      | 16.9M/41.5M [03:56<07:24, 57.9kB/s]
     41%|####      | 17.0M/41.5M [03:57<05:31, 77.6kB/s]
     41%|####      | 17.0M/41.5M [03:57<06:30, 65.8kB/s]
     41%|####      | 17.0M/41.5M [03:57<05:58, 71.5kB/s]
     41%|####1     | 17.0M/41.5M [03:57<05:35, 76.5kB/s]
     41%|####1     | 17.0M/41.5M [03:58<05:18, 80.5kB/s]
     41%|####1     | 17.0M/41.5M [03:58<05:06, 83.7kB/s]
     41%|####1     | 17.1M/41.5M [03:58<04:57, 86.2kB/s]
     41%|####1     | 17.1M/41.5M [03:58<06:12, 68.8kB/s]
     41%|####1     | 1
 7.1M/41.5M [03:58<05:43, 74.5kB/s]
     41%|####1     | 17.1M/41.5M [03:59<05:23, 79.1kB/s]
     41%|####1     | 17.1M/41.5M [03:59<05:08, 82.7kB/s]
     41%|####1     | 17.1M/41.5M [03:59<04:58, 85.5kB/s]
     41%|####1     | 17.2M/41.5M [03:59<04:13, 101kB/s] 
     41%|####1     | 17.2M/41.5M [03:59<04:37, 92.0kB/s]
     41%|####1     | 17.2M/41.5M [03:59<04:18, 98.6kB/s]
     41%|####1     | 17.2M/41.5M [04:00<06:50, 62.0kB/s]
     42%|####1     | 17.2M/41.5M [04:00<05:31, 76.7kB/s]
     42%|####1     | 17.2M/41.5M [04:00<05:16, 80.3kB/s]
     42%|####1     | 17.3M/41.5M [04:01<06:19, 66.9kB/s]
     42%|####1     | 17.3M/41.5M [04:01<05:49, 72.6kB/s]
     42%|####1     | 17.3M/41.5M [04:01<05:27, 77.4kB/s]
     42%|####1     | 17.3M/41.5M [04:01<06:31, 64.8kB/s]
     42%|####1     | 17.3M/41.5M [04:02<07:17, 57.9kB/s]
     42%|####1     | 17.3M/41.5M [04:02<07:59, 52.8kB/s]
     42%|####1     | 17.4M/41.5M [04:02<07:58, 52.9kB/s]
     42%|####1     | 17.4M/41.5M [04:02<08:11, 51.
 5kB/s]
     42%|####1     | 17.4M/41.5M [04:03<10:19, 40.8kB/s]
     42%|####1     | 17.4M/41.5M [04:03<14:17, 29.5kB/s]
     42%|####1     | 17.4M/41.5M [04:04<14:44, 28.6kB/s]
     42%|####1     | 17.4M/41.5M [04:04<12:01, 35.0kB/s]
     42%|####1     | 17.4M/41.5M [04:04<11:24, 36.9kB/s]
     42%|####1     | 17.4M/41.5M [04:05<10:52, 38.7kB/s]
     42%|####2     | 17.4M/41.5M [04:05<10:25, 40.3kB/s]
     42%|####2     | 17.4M/41.5M [04:05<10:04, 41.7kB/s]
     42%|####2     | 17.4M/41.5M [04:05<12:14, 34.3kB/s]
     42%|####2     | 17.5M/41.5M [04:05<11:21, 37.0kB/s]
     42%|####2     | 17.5M/41.5M [04:06<10:41, 39.3kB/s]
     42%|####2     | 17.5M/41.5M [04:06<10:13, 41.1kB/s]
     42%|####2     | 17.5M/41.5M [04:06<09:52, 42.5kB/s]
     42%|####2     | 17.5M/41.5M [04:06<09:37, 43.6kB/s]
     42%|####2     | 17.5M/41.5M [04:07<12:07, 34.6kB/s]
     42%|####2     | 17.5M/41.5M [04:07<11:12, 37.4kB/s]
     42%|####2     | 17.5M/41.5M [04:07<13:15, 31.6kB/s]
     42%|####2     | 
 17.5M/41.5M [04:07<09:13, 45.4kB/s]
     42%|####2     | 17.5M/41.5M [04:08<09:08, 45.8kB/s]
     42%|####2     | 17.6M/41.5M [04:08<07:27, 56.0kB/s]
     42%|####2     | 17.6M/41.5M [04:08<07:46, 53.7kB/s]
     42%|####2     | 17.6M/41.5M [04:08<08:03, 51.9kB/s]
     42%|####2     | 17.6M/41.5M [04:08<08:17, 50.4kB/s]
     42%|####2     | 17.6M/41.5M [04:08<08:28, 49.3kB/s]
     42%|####2     | 17.6M/41.5M [04:09<06:46, 61.6kB/s]
     42%|####2     | 17.6M/41.5M [04:09<07:17, 57.2kB/s]
     42%|####2     | 17.6M/41.5M [04:09<09:57, 41.9kB/s]
     42%|####2     | 17.6M/41.5M [04:09<09:42, 43.0kB/s]
     42%|####2     | 17.6M/41.5M [04:10<09:30, 43.8kB/s]
     43%|####2     | 17.6M/41.5M [04:10<09:21, 44.5kB/s]
     43%|####2     | 17.6M/41.5M [04:10<11:49, 35.2kB/s]
     43%|####2     | 17.7M/41.5M [04:10<08:31, 48.9kB/s]
     43%|####2     | 17.7M/41.5M [04:11<10:48, 38.5kB/s]
     43%|####2     | 17.7M/41.5M [04:11<08:13, 50.6kB/s]
     43%|####2     | 17.7M/41.5M [04:11<10:25, 39
 .9kB/s]
     43%|####2     | 17.7M/41.5M [04:11<09:50, 42.2kB/s]
     43%|####2     | 17.7M/41.5M [04:12<09:38, 43.1kB/s]
     43%|####2     | 17.7M/41.5M [04:12<11:36, 35.8kB/s]
     43%|####2     | 17.7M/41.5M [04:12<10:56, 38.0kB/s]
     43%|####2     | 17.7M/41.5M [04:12<10:24, 39.9kB/s]
     43%|####2     | 17.8M/41.5M [04:13<09:59, 41.5kB/s]
     43%|####2     | 17.8M/41.5M [04:13<09:41, 42.8kB/s]
     43%|####2     | 17.8M/41.5M [04:13<09:28, 43.7kB/s]
     43%|####2     | 17.8M/41.5M [04:13<09:19, 44.5kB/s]
     43%|####2     | 17.8M/41.5M [04:13<09:12, 45.0kB/s]
     43%|####2     | 17.8M/41.5M [04:14<11:45, 35.2kB/s]
     43%|####2     | 17.8M/41.5M [04:14<10:54, 37.9kB/s]
     43%|####2     | 17.8M/41.5M [04:14<08:27, 49.0kB/s]
     43%|####2     | 17.8M/41.5M [04:14<08:33, 48.3kB/s]
     43%|####2     | 17.8M/41.5M [04:15<10:58, 37.7kB/s]
     43%|####2     | 17.8M/41.5M [04:15<09:49, 42.1kB/s]
     43%|####3     | 17.8M/41.5M [04:15<09:33, 43.2kB/s]
     43%|####3     |
  17.9M/41.5M [04:15<10:49, 38.2kB/s]
     43%|####3     | 17.9M/41.5M [04:15<10:15, 40.2kB/s]
     43%|####3     | 17.9M/41.5M [04:15<09:51, 41.8kB/s]
     43%|####3     | 17.9M/41.5M [04:16<09:34, 43.1kB/s]
     43%|####3     | 17.9M/41.5M [04:16<09:22, 44.0kB/s]
     43%|####3     | 17.9M/41.5M [04:16<09:13, 44.7kB/s]
     43%|####3     | 17.9M/41.5M [04:16<07:54, 52.2kB/s]
     43%|####3     | 17.9M/41.5M [04:16<07:12, 57.2kB/s]
     43%|####3     | 17.9M/41.5M [04:17<07:38, 54.0kB/s]
     43%|####3     | 17.9M/41.5M [04:17<06:16, 65.7kB/s]
     43%|####3     | 17.9M/41.5M [04:17<06:52, 59.9kB/s]
     43%|####3     | 18.0M/41.5M [04:17<05:53, 69.8kB/s]
     43%|####3     | 18.0M/41.5M [04:17<06:58, 58.9kB/s]
     43%|####3     | 18.0M/41.5M [04:18<05:12, 78.9kB/s]
     43%|####3     | 18.0M/41.5M [04:18<04:58, 82.5kB/s]
     43%|####3     | 18.0M/41.5M [04:18<04:48, 85.3kB/s]
     43%|####3     | 18.0M/41.5M [04:18<06:00, 68.3kB/s]
     44%|####3     | 18.1M/41.5M [04:18<05:31, 7
 4.0kB/s]
     44%|####3     | 18.1M/41.5M [04:19<05:11, 78.7kB/s]
     44%|####3     | 18.1M/41.5M [04:19<04:57, 82.4kB/s]
     44%|####3     | 18.1M/41.5M [04:19<04:47, 85.3kB/s]
     44%|####3     | 18.1M/41.5M [04:19<04:40, 87.4kB/s]
     44%|####3     | 18.1M/41.5M [04:20<05:54, 69.1kB/s]
     44%|####3     | 18.2M/41.5M [04:20<05:00, 81.4kB/s]
     44%|####3     | 18.2M/41.5M [04:20<05:05, 80.1kB/s]
     44%|####3     | 18.2M/41.5M [04:20<04:53, 83.2kB/s]
     44%|####3     | 18.2M/41.5M [04:20<04:44, 85.8kB/s]
     44%|####3     | 18.2M/41.5M [04:20<04:38, 87.7kB/s]
     44%|####3     | 18.2M/41.5M [04:21<04:33, 89.1kB/s]
     44%|####4     | 18.3M/41.5M [04:21<04:30, 90.2kB/s]
     44%|####4     | 18.3M/41.5M [04:21<04:27, 90.9kB/s]
     44%|####4     | 18.3M/41.5M [04:21<04:25, 91.5kB/s]
     44%|####4     | 18.3M/41.5M [04:21<04:24, 91.8kB/s]
     44%|####4     | 18.3M/41.5M [04:22<04:23, 92.1kB/s]
     44%|####4     | 18.3M/41.5M [04:22<04:06, 98.5kB/s]
     44%|####4     
 | 18.4M/41.5M [04:22<04:10, 96.7kB/s]
     44%|####4     | 18.4M/41.5M [04:22<04:13, 95.5kB/s]
     44%|####4     | 18.4M/41.5M [04:22<03:42, 109kB/s] 
     44%|####4     | 18.4M/41.5M [04:22<03:38, 111kB/s]
     44%|####4     | 18.4M/41.5M [04:23<03:22, 119kB/s]
     44%|####4     | 18.5M/41.5M [04:23<03:23, 119kB/s]
     45%|####4     | 18.5M/41.5M [04:23<03:13, 125kB/s]
     45%|####4     | 18.5M/41.5M [04:23<03:18, 122kB/s]
     45%|####4     | 18.5M/41.5M [04:23<02:58, 135kB/s]
     45%|####4     | 18.5M/41.5M [04:23<02:56, 136kB/s]
     45%|####4     | 18.6M/41.5M [04:24<02:55, 137kB/s]
     45%|####4     | 18.6M/41.5M [04:24<02:48, 143kB/s]
     45%|####4     | 18.6M/41.5M [04:24<02:38, 151kB/s]
     45%|####4     | 18.6M/41.5M [04:24<03:32, 113kB/s]
     45%|####5     | 18.7M/41.5M [04:25<03:06, 128kB/s]
     45%|####5     | 18.7M/41.5M [04:25<02:52, 139kB/s]
     45%|####5     | 18.7M/41.5M [04:25<03:09, 126kB/s]
     45%|####5     | 18.8M/41.5M [04:25<03:03, 130kB/s]
     
 45%|####5     | 18.8M/41.5M [04:25<03:19, 119kB/s]
     45%|####5     | 18.8M/41.5M [04:26<04:35, 86.5kB/s]
     45%|####5     | 18.8M/41.5M [04:26<05:35, 70.9kB/s]
     45%|####5     | 18.8M/41.5M [04:26<04:50, 81.7kB/s]
     45%|####5     | 18.8M/41.5M [04:27<07:56, 49.9kB/s]
     45%|####5     | 18.9M/41.5M [04:27<09:22, 42.2kB/s]
     45%|####5     | 18.9M/41.5M [04:28<07:00, 56.4kB/s]
     46%|####5     | 18.9M/41.5M [04:28<07:23, 53.4kB/s]
     46%|####5     | 18.9M/41.5M [04:28<06:32, 60.3kB/s]
     46%|####5     | 18.9M/41.5M [04:28<06:51, 57.5kB/s]
     46%|####5     | 18.9M/41.5M [04:28<07:09, 55.0kB/s]
     46%|####5     | 18.9M/41.5M [04:29<06:08, 64.1kB/s]
     46%|####5     | 19.0M/41.5M [04:29<08:19, 47.3kB/s]
     46%|####5     | 19.0M/41.5M [04:29<07:01, 56.1kB/s]
     46%|####5     | 19.0M/41.5M [04:30<07:17, 54.0kB/s]
     46%|####5     | 19.0M/41.5M [04:30<06:13, 63.1kB/s]
     46%|####5     | 19.0M/41.5M [04:30<08:41, 45.2kB/s]
     46%|####5     | 19.0M/41.5M [
 04:30<08:38, 45.4kB/s]
     46%|####5     | 19.0M/41.5M [04:31<10:38, 36.9kB/s]
     46%|####5     | 19.0M/41.5M [04:31<12:14, 32.1kB/s]
     46%|####5     | 19.0M/41.5M [04:31<11:13, 35.0kB/s]
     46%|####5     | 19.0M/41.5M [04:32<15:46, 24.9kB/s]
     46%|####5     | 19.0M/41.5M [04:32<18:29, 21.2kB/s]
     46%|####5     | 19.1M/41.5M [04:33<15:52, 24.7kB/s]
     46%|####5     | 19.1M/41.5M [04:33<12:32, 31.2kB/s]
     46%|####6     | 19.1M/41.5M [04:34<14:13, 27.5kB/s]
     46%|####6     | 19.1M/41.5M [04:34<14:02, 27.9kB/s]
     46%|####6     | 19.1M/41.5M [04:34<14:45, 26.5kB/s]
     46%|####6     | 19.1M/41.5M [04:34<13:58, 28.0kB/s]
     46%|####6     | 19.1M/41.5M [04:35<14:23, 27.2kB/s]
     46%|####6     | 19.1M/41.5M [04:35<15:28, 25.3kB/s]
     46%|####6     | 19.1M/41.5M [04:35<13:03, 29.9kB/s]
     46%|####6     | 19.1M/41.5M [04:35<12:05, 32.3kB/s]
     46%|####6     | 19.1M/41.5M [04:36<13:28, 29.0kB/s]
     46%|####6     | 19.2M/41.5M [04:36<11:58, 32.6kB/s]
     
 46%|####6     | 19.2M/41.5M [04:36<13:24, 29.1kB/s]
     46%|####6     | 19.2M/41.5M [04:37<11:55, 32.7kB/s]
     46%|####6     | 19.2M/41.5M [04:37<13:22, 29.1kB/s]
     46%|####6     | 19.2M/41.5M [04:37<10:16, 37.9kB/s]
     46%|####6     | 19.2M/41.5M [04:37<10:40, 36.5kB/s]
     46%|####6     | 19.2M/41.5M [04:38<10:04, 38.6kB/s]
     46%|####6     | 19.2M/41.5M [04:38<09:37, 40.5kB/s]
     46%|####6     | 19.2M/41.5M [04:38<09:16, 42.0kB/s]
     46%|####6     | 19.2M/41.5M [04:38<11:24, 34.1kB/s]
     46%|####6     | 19.2M/41.5M [04:39<10:02, 38.7kB/s]
     46%|####6     | 19.3M/41.5M [04:39<09:38, 40.3kB/s]
     46%|####6     | 19.3M/41.5M [04:39<09:18, 41.7kB/s]
     46%|####6     | 19.3M/41.5M [04:39<11:18, 34.3kB/s]
     46%|####6     | 19.3M/41.5M [04:40<10:29, 37.0kB/s]
     46%|####6     | 19.3M/41.5M [04:40<09:52, 39.3kB/s]
     47%|####6     | 19.3M/41.5M [04:40<09:26, 41.1kB/s]
     47%|####6     | 19.3M/41.5M [04:40<09:07, 42.5kB/s]
     47%|####6     | 19.3M/41.5M 
 [04:40<08:46, 44.2kB/s]
     47%|####6     | 19.3M/41.5M [04:41<08:59, 43.1kB/s]
     47%|####6     | 19.4M/41.5M [04:41<08:24, 46.0kB/s]
     47%|####6     | 19.4M/41.5M [04:41<08:46, 44.1kB/s]
     47%|####6     | 19.4M/41.5M [04:42<08:40, 44.6kB/s]
     47%|####6     | 19.4M/41.5M [04:42<08:35, 45.0kB/s]
     47%|####6     | 19.4M/41.5M [04:42<08:31, 45.3kB/s]
     47%|####6     | 19.4M/41.5M [04:42<08:27, 45.6kB/s]
     47%|####6     | 19.4M/41.5M [04:42<08:25, 45.8kB/s]
     47%|####6     | 19.4M/41.5M [04:42<08:23, 46.0kB/s]
     47%|####6     | 19.4M/41.5M [04:43<07:49, 49.3kB/s]
     47%|####6     | 19.4M/41.5M [04:43<07:58, 48.4kB/s]
     47%|####6     | 19.4M/41.5M [04:43<10:30, 36.7kB/s]
     47%|####6     | 19.5M/41.5M [04:43<06:32, 58.8kB/s]
     47%|####6     | 19.5M/41.5M [04:43<06:54, 55.7kB/s]
     47%|####6     | 19.5M/41.5M [04:44<05:52, 65.5kB/s]
     47%|####6     | 19.5M/41.5M [04:44<05:16, 73.0kB/s]
     47%|####6     | 19.5M/41.5M [04:44<05:52, 65.5kB/s]
    
  47%|####7     | 19.5M/41.5M [04:44<05:14, 73.3kB/s]
     47%|####7     | 19.5M/41.5M [04:44<04:51, 79.0kB/s]
     47%|####7     | 19.5M/41.5M [04:45<04:37, 83.0kB/s]
     47%|####7     | 19.6M/41.5M [04:45<04:27, 85.9kB/s]
     47%|####7     | 19.6M/41.5M [04:45<04:21, 87.9kB/s]
     47%|####7     | 19.6M/41.5M [04:45<04:16, 89.4kB/s]
     47%|####7     | 19.6M/41.5M [04:45<05:29, 69.6kB/s]
     47%|####7     | 19.6M/41.5M [04:46<05:04, 75.3kB/s]
     47%|####7     | 19.6M/41.5M [04:46<06:01, 63.3kB/s]
     47%|####7     | 19.7M/41.5M [04:46<04:59, 76.3kB/s]
     47%|####7     | 19.7M/41.5M [04:46<05:35, 68.1kB/s]
     47%|####7     | 19.7M/41.5M [04:47<05:27, 69.8kB/s]
     47%|####7     | 19.7M/41.5M [04:47<05:34, 68.2kB/s]
     48%|####7     | 19.7M/41.5M [04:47<05:25, 70.2kB/s]
     48%|####7     | 19.7M/41.5M [04:47<05:33, 68.3kB/s]
     48%|####7     | 19.7M/41.5M [04:47<05:03, 75.2kB/s]
     48%|####7     | 19.8M/41.5M [04:48<05:03, 75.0kB/s]
     48%|####7     | 19.8M/41.5M
  [04:48<06:04, 62.6kB/s]
     48%|####7     | 19.8M/41.5M [04:48<05:48, 65.2kB/s]
     48%|####7     | 19.8M/41.5M [04:48<05:03, 74.9kB/s]
     48%|####7     | 19.8M/41.5M [04:49<05:56, 63.7kB/s]
     48%|####7     | 19.8M/41.5M [04:49<06:18, 59.9kB/s]
     48%|####7     | 19.8M/41.5M [04:49<06:40, 56.7kB/s]
     48%|####7     | 19.9M/41.5M [04:49<08:46, 43.1kB/s]
     48%|####7     | 19.9M/41.5M [04:50<06:23, 59.2kB/s]
     48%|####7     | 19.9M/41.5M [04:50<06:45, 55.8kB/s]
     48%|####7     | 19.9M/41.5M [04:50<06:18, 59.8kB/s]
     48%|####7     | 19.9M/41.5M [04:50<06:41, 56.3kB/s]
     48%|####7     | 19.9M/41.5M [04:51<08:56, 42.2kB/s]
     48%|####8     | 19.9M/41.5M [04:51<09:37, 39.1kB/s]
     48%|####8     | 19.9M/41.5M [04:52<10:33, 35.7kB/s]
     48%|####8     | 20.0M/41.5M [04:52<11:06, 33.9kB/s]
     48%|####8     | 20.0M/41.5M [04:53<13:41, 27.5kB/s]
     48%|####8     | 20.0M/41.5M [04:53<16:01, 23.5kB/s]
     48%|####8     | 20.0M/41.5M [04:53<16:03, 23.4kB/s]
   
   48%|####8     | 20.0M/41.5M [04:54<16:05, 23.4kB/s]
     48%|####8     | 20.0M/41.5M [04:54<13:57, 26.9kB/s]
     48%|####8     | 20.0M/41.5M [04:54<12:20, 30.4kB/s]
     48%|####8     | 20.0M/41.5M [04:55<13:25, 28.0kB/s]
     48%|####8     | 20.0M/41.5M [04:55<11:53, 31.6kB/s]
     48%|####8     | 20.0M/41.5M [04:55<10:46, 34.8kB/s]
     48%|####8     | 20.0M/41.5M [04:55<07:42, 48.6kB/s]
     48%|####8     | 20.0M/41.5M [04:55<07:48, 48.0kB/s]
     48%|####8     | 20.1M/41.5M [04:55<07:52, 47.6kB/s]
     48%|####8     | 20.1M/41.5M [04:56<06:14, 60.0kB/s]
     48%|####8     | 20.1M/41.5M [04:56<06:39, 56.2kB/s]
     48%|####8     | 20.1M/41.5M [04:56<07:00, 53.4kB/s]
     48%|####8     | 20.1M/41.5M [04:56<07:26, 50.2kB/s]
     49%|####8     | 20.1M/41.5M [04:57<06:27, 57.8kB/s]
     49%|####8     | 20.1M/41.5M [04:57<04:56, 75.6kB/s]
     49%|####8     | 20.2M/41.5M [04:57<04:40, 79.7kB/s]
     49%|####8     | 20.2M/41.5M [04:57<05:38, 66.1kB/s]
     49%|####8     | 20.2M/41.5
 M [04:58<06:02, 61.7kB/s]
     49%|####8     | 20.2M/41.5M [04:58<09:31, 39.0kB/s]
     49%|####8     | 20.2M/41.5M [04:59<10:24, 35.7kB/s]
     49%|####8     | 20.2M/41.5M [04:59<13:10, 28.2kB/s]
     49%|####8     | 20.2M/41.5M [04:59<12:00, 30.9kB/s]
     49%|####8     | 20.2M/41.5M [05:00<12:58, 28.6kB/s]
     49%|####8     | 20.2M/41.5M [05:00<11:41, 31.7kB/s]
     49%|####8     | 20.2M/41.5M [05:00<12:51, 28.9kB/s]
     49%|####8     | 20.3M/41.5M [05:00<11:30, 32.3kB/s]
     49%|####8     | 20.3M/41.5M [05:01<10:30, 35.3kB/s]
     49%|####8     | 20.3M/41.5M [05:01<09:46, 37.9kB/s]
     49%|####8     | 20.3M/41.5M [05:01<09:15, 40.1kB/s]
     49%|####8     | 20.3M/41.5M [05:01<08:52, 41.7kB/s]
     49%|####8     | 20.3M/41.5M [05:01<08:36, 43.0kB/s]
     49%|####8     | 20.3M/41.5M [05:02<09:16, 39.9kB/s]
     49%|####8     | 20.3M/41.5M [05:02<08:02, 46.0kB/s]
     49%|####8     | 20.3M/41.5M [05:02<08:01, 46.1kB/s]
     49%|####8     | 20.3M/41.5M [05:02<08:00, 46.2kB/s]
  
    49%|####9     | 20.3M/41.5M [05:02<07:59, 46.2kB/s]
     49%|####9     | 20.4M/41.5M [05:02<06:08, 60.1kB/s]
     49%|####9     | 20.4M/41.5M [05:03<06:35, 56.0kB/s]
     49%|####9     | 20.4M/41.5M [05:03<05:30, 67.0kB/s]
     49%|####9     | 20.4M/41.5M [05:03<06:03, 60.8kB/s]
     49%|####9     | 20.4M/41.5M [05:03<06:31, 56.5kB/s]
     49%|####9     | 20.4M/41.5M [05:03<05:28, 67.3kB/s]
     49%|####9     | 20.4M/41.5M [05:03<06:01, 61.1kB/s]
     49%|####9     | 20.4M/41.5M [05:04<06:30, 56.6kB/s]
     49%|####9     | 20.4M/41.5M [05:04<05:27, 67.5kB/s]
     49%|####9     | 20.4M/41.5M [05:04<06:00, 61.2kB/s]
     49%|####9     | 20.5M/41.5M [05:04<05:12, 70.6kB/s]
     49%|####9     | 20.5M/41.5M [05:04<04:45, 77.3kB/s]
     49%|####9     | 20.5M/41.5M [05:04<04:28, 81.9kB/s]
     49%|####9     | 20.5M/41.5M [05:05<05:08, 71.3kB/s]
     49%|####9     | 20.5M/41.5M [05:05<04:42, 77.7kB/s]
     49%|####9     | 20.5M/41.5M [05:05<04:27, 82.2kB/s]
     50%|####9     | 20.5M/41.
 5M [05:05<04:17, 85.3kB/s]
     50%|####9     | 20.6M/41.5M [05:05<04:10, 87.6kB/s]
     50%|####9     | 20.6M/41.5M [05:06<04:06, 89.1kB/s]
     50%|####9     | 20.6M/41.5M [05:06<04:02, 90.2kB/s]
     50%|####9     | 20.6M/41.5M [05:06<04:00, 91.0kB/s]
     50%|####9     | 20.6M/41.5M [05:06<03:58, 91.6kB/s]
     50%|####9     | 20.6M/41.5M [05:06<03:57, 92.0kB/s]
     50%|####9     | 20.7M/41.5M [05:06<03:25, 106kB/s] 
     50%|####9     | 20.7M/41.5M [05:07<03:33, 102kB/s]
     50%|####9     | 20.7M/41.5M [05:07<03:12, 113kB/s]
     50%|####9     | 20.7M/41.5M [05:07<03:00, 121kB/s]
     50%|#####     | 20.8M/41.5M [05:07<02:52, 126kB/s]
     50%|#####     | 20.8M/41.5M [05:07<04:02, 89.5kB/s]
     50%|#####     | 20.8M/41.5M [05:08<02:53, 125kB/s] 
     50%|#####     | 20.8M/41.5M [05:08<02:48, 128kB/s]
     50%|#####     | 20.8M/41.5M [05:08<03:14, 111kB/s]
     50%|#####     | 20.9M/41.5M [05:08<03:09, 114kB/s]
     50%|#####     | 20.9M/41.5M [05:08<03:35, 100kB/s]
     50%|
 #####     | 20.9M/41.5M [05:09<03:39, 98.3kB/s]
     50%|#####     | 20.9M/41.5M [05:09<04:30, 79.7kB/s]
     50%|#####     | 20.9M/41.5M [05:09<04:34, 78.5kB/s]
     50%|#####     | 20.9M/41.5M [05:09<04:23, 81.7kB/s]
     51%|#####     | 21.0M/41.5M [05:09<04:14, 84.6kB/s]
     51%|#####     | 21.0M/41.5M [05:10<04:07, 86.9kB/s]
     51%|#####     | 21.0M/41.5M [05:10<04:02, 88.5kB/s]
     51%|#####     | 21.0M/41.5M [05:10<05:24, 66.1kB/s]
     51%|#####     | 21.0M/41.5M [05:10<04:18, 83.0kB/s]
     51%|#####     | 21.0M/41.5M [05:11<04:42, 75.8kB/s]
     51%|#####     | 21.1M/41.5M [05:11<04:43, 75.5kB/s]
     51%|#####     | 21.1M/41.5M [05:11<05:13, 68.4kB/s]
     51%|#####     | 21.1M/41.5M [05:11<04:46, 74.7kB/s]
     51%|#####     | 21.1M/41.5M [05:11<05:19, 67.1kB/s]
     51%|#####     | 21.1M/41.5M [05:12<05:08, 69.3kB/s]
     51%|#####     | 21.1M/41.5M [05:12<04:42, 75.7kB/s]
     51%|#####     | 21.1M/41.5M [05:12<04:54, 72.6kB/s]
     51%|#####     | 21.1M/41.5M [05:
 12<04:51, 73.1kB/s]
     51%|#####     | 21.1M/41.5M [05:12<05:02, 70.6kB/s]
     51%|#####1    | 21.2M/41.5M [05:12<04:10, 84.9kB/s]
     51%|#####1    | 21.2M/41.5M [05:13<04:04, 87.2kB/s]
     51%|#####1    | 21.2M/41.5M [05:13<03:57, 89.4kB/s]
     51%|#####1    | 21.2M/41.5M [05:13<03:55, 90.4kB/s]
     51%|#####1    | 21.2M/41.5M [05:13<03:37, 97.8kB/s]
     51%|#####1    | 21.3M/41.5M [05:14<04:25, 79.9kB/s]
     51%|#####1    | 21.3M/41.5M [05:14<03:21, 105kB/s] 
     51%|#####1    | 21.3M/41.5M [05:14<03:27, 102kB/s]
     51%|#####1    | 21.3M/41.5M [05:14<03:31, 100kB/s]
     51%|#####1    | 21.3M/41.5M [05:14<03:35, 98.1kB/s]
     51%|#####1    | 21.4M/41.5M [05:14<03:24, 103kB/s] 
     52%|#####1    | 21.4M/41.5M [05:15<03:30, 100kB/s]
     52%|#####1    | 21.4M/41.5M [05:15<03:08, 112kB/s]
     52%|#####1    | 21.4M/41.5M [05:15<03:18, 106kB/s]
     52%|#####1    | 21.4M/41.5M [05:15<03:01, 116kB/s]
     52%|#####1    | 21.4M/41.5M [05:15<03:12, 109kB/s]
     52%|#####1
     | 21.5M/41.5M [05:15<03:21, 104kB/s]
     52%|#####1    | 21.5M/41.5M [05:16<03:02, 115kB/s]
     52%|#####1    | 21.5M/41.5M [05:16<02:51, 122kB/s]
     52%|#####1    | 21.5M/41.5M [05:16<03:31, 98.8kB/s]
     52%|#####1    | 21.5M/41.5M [05:16<02:57, 118kB/s] 
     52%|#####1    | 21.6M/41.5M [05:17<04:05, 85.3kB/s]
     52%|#####2    | 21.6M/41.5M [05:17<03:46, 92.3kB/s]
     52%|#####2    | 21.6M/41.5M [05:17<05:00, 69.3kB/s]
     52%|#####2    | 21.6M/41.5M [05:17<04:39, 74.6kB/s]
     52%|#####2    | 21.6M/41.5M [05:17<04:42, 73.8kB/s]
     52%|#####2    | 21.6M/41.5M [05:18<05:30, 63.0kB/s]
     52%|#####2    | 21.7M/41.5M [05:18<04:58, 69.6kB/s]
     52%|#####2    | 21.7M/41.5M [05:18<05:24, 64.0kB/s]
     52%|#####2    | 21.7M/41.5M [05:19<07:18, 47.4kB/s]
     52%|#####2    | 21.7M/41.5M [05:19<05:05, 68.0kB/s]
     52%|#####2    | 21.7M/41.5M [05:19<06:51, 50.5kB/s]
     52%|#####2    | 21.7M/41.5M [05:19<06:58, 49.6kB/s]
     52%|#####2    | 21.7M/41.5M [05:20<08:45,
  39.5kB/s]
     52%|#####2    | 21.7M/41.5M [05:20<06:46, 50.9kB/s]
     52%|#####2    | 21.7M/41.5M [05:20<06:55, 49.9kB/s]
     52%|#####2    | 21.8M/41.5M [05:20<07:02, 49.0kB/s]
     52%|#####2    | 21.8M/41.5M [05:20<05:41, 60.5kB/s]
     52%|#####2    | 21.8M/41.5M [05:20<06:04, 56.7kB/s]
     53%|#####2    | 21.8M/41.5M [05:21<05:29, 62.7kB/s]
     53%|#####2    | 21.8M/41.5M [05:21<05:31, 62.2kB/s]
     53%|#####2    | 21.8M/41.5M [05:21<05:58, 57.6kB/s]
     53%|#####2    | 21.8M/41.5M [05:21<05:24, 63.6kB/s]
     53%|#####2    | 21.8M/41.5M [05:21<05:27, 62.9kB/s]
     53%|#####2    | 21.8M/41.5M [05:22<04:46, 71.9kB/s]
     53%|#####2    | 21.9M/41.5M [05:22<05:20, 64.2kB/s]
     53%|#####2    | 21.9M/41.5M [05:22<04:42, 72.8kB/s]
     53%|#####2    | 21.9M/41.5M [05:22<04:20, 78.8kB/s]
     53%|#####2    | 21.9M/41.5M [05:22<04:07, 83.0kB/s]
     53%|#####2    | 21.9M/41.5M [05:22<03:58, 85.9kB/s]
     53%|#####2    | 21.9M/41.5M [05:23<03:53, 88.0kB/s]
     53%|#####2  
   | 22.0M/41.5M [05:23<03:18, 103kB/s] 
     53%|#####2    | 22.0M/41.5M [05:23<03:24, 100kB/s]
     53%|#####3    | 22.0M/41.5M [05:23<03:02, 112kB/s]
     53%|#####3    | 22.0M/41.5M [05:23<03:12, 106kB/s]
     53%|#####3    | 22.0M/41.5M [05:24<04:19, 78.6kB/s]
     53%|#####3    | 22.1M/41.5M [05:24<02:54, 117kB/s] 
     53%|#####3    | 22.1M/41.5M [05:24<03:04, 110kB/s]
     53%|#####3    | 22.1M/41.5M [05:24<03:12, 106kB/s]
     53%|#####3    | 22.1M/41.5M [05:24<02:56, 115kB/s]
     53%|#####3    | 22.1M/41.5M [05:25<03:06, 109kB/s]
     53%|#####3    | 22.2M/41.5M [05:25<03:42, 91.1kB/s]
     53%|#####3    | 22.2M/41.5M [05:25<02:57, 114kB/s] 
     54%|#####3    | 22.2M/41.5M [05:25<03:06, 109kB/s]
     54%|#####3    | 22.2M/41.5M [05:25<03:04, 110kB/s]
     54%|#####3    | 22.2M/41.5M [05:26<03:49, 88.1kB/s]
     54%|#####3    | 22.3M/41.5M [05:26<02:53, 116kB/s] 
     54%|#####3    | 22.3M/41.5M [05:26<03:03, 110kB/s]
     54%|#####3    | 22.3M/41.5M [05:26<02:53, 116kB/s]
 
     54%|#####3    | 22.3M/41.5M [05:26<03:08, 106kB/s]
     54%|#####3    | 22.3M/41.5M [05:27<03:05, 108kB/s]
     54%|#####3    | 22.4M/41.5M [05:27<02:51, 117kB/s]
     54%|#####3    | 22.4M/41.5M [05:27<02:44, 122kB/s]
     54%|#####4    | 22.4M/41.5M [05:27<02:53, 115kB/s]
     54%|#####4    | 22.4M/41.5M [05:27<02:43, 122kB/s]
     54%|#####4    | 22.5M/41.5M [05:28<02:37, 127kB/s]
     54%|#####4    | 22.5M/41.5M [05:28<02:50, 117kB/s]
     54%|#####4    | 22.5M/41.5M [05:28<02:41, 124kB/s]
     54%|#####4    | 22.5M/41.5M [05:28<03:41, 89.8kB/s]
     54%|#####4    | 22.5M/41.5M [05:28<02:48, 118kB/s] 
     54%|#####4    | 22.6M/41.5M [05:29<02:58, 111kB/s]
     54%|#####4    | 22.6M/41.5M [05:29<03:06, 106kB/s]
     54%|#####4    | 22.6M/41.5M [05:29<03:12, 103kB/s]
     54%|#####4    | 22.6M/41.5M [05:29<03:18, 99.9kB/s]
     55%|#####4    | 22.6M/41.5M [05:29<04:20, 76.0kB/s]
     55%|#####4    | 22.6M/41.5M [05:30<04:06, 80.1kB/s]
     55%|#####4    | 22.7M/41.5M [05:30
 <04:14, 77.5kB/s]
     55%|#####4    | 22.7M/41.5M [05:30<04:45, 69.0kB/s]
     55%|#####4    | 22.7M/41.5M [05:30<04:24, 74.6kB/s]
     55%|#####4    | 22.7M/41.5M [05:31<05:11, 63.3kB/s]
     55%|#####4    | 22.7M/41.5M [05:31<04:34, 71.7kB/s]
     55%|#####4    | 22.7M/41.5M [05:31<04:44, 69.2kB/s]
     55%|#####4    | 22.8M/41.5M [05:31<04:23, 74.4kB/s]
     55%|#####4    | 22.8M/41.5M [05:32<04:08, 78.9kB/s]
     55%|#####4    | 22.8M/41.5M [05:32<03:57, 82.4kB/s]
     55%|#####4    | 22.8M/41.5M [05:32<03:50, 85.1kB/s]
     55%|#####5    | 22.8M/41.5M [05:32<04:17, 76.1kB/s]
     55%|#####5    | 22.8M/41.5M [05:32<04:03, 80.4kB/s]
     55%|#####5    | 22.9M/41.5M [05:33<03:53, 83.7kB/s]
     55%|#####5    | 22.9M/41.5M [05:33<03:46, 86.2kB/s]
     55%|#####5    | 22.9M/41.5M [05:33<04:44, 68.5kB/s]
     55%|#####5    | 22.9M/41.5M [05:33<03:48, 85.4kB/s]
     55%|#####5    | 22.9M/41.5M [05:33<03:43, 87.3kB/s]
     55%|#####5    | 22.9M/41.5M [05:34<03:39, 88.8kB/s]
     55%|#
 ####5    | 23.0M/41.5M [05:34<03:36, 89.9kB/s]
     55%|#####5    | 23.0M/41.5M [05:34<03:34, 90.7kB/s]
     55%|#####5    | 23.0M/41.5M [05:34<03:32, 91.3kB/s]
     55%|#####5    | 23.0M/41.5M [05:34<03:28, 92.7kB/s]
     55%|#####5    | 23.0M/41.5M [05:35<03:28, 92.8kB/s]
     56%|#####5    | 23.0M/41.5M [05:35<03:28, 92.7kB/s]
     56%|#####5    | 23.1M/41.5M [05:35<03:02, 106kB/s] 
     56%|#####5    | 23.1M/41.5M [05:35<03:09, 102kB/s]
     56%|#####5    | 23.1M/41.5M [05:35<03:14, 99.4kB/s]
     56%|#####5    | 23.1M/41.5M [05:36<03:44, 85.8kB/s]
     56%|#####5    | 23.1M/41.5M [05:36<02:54, 110kB/s] 
     56%|#####5    | 23.2M/41.5M [05:36<03:01, 106kB/s]
     56%|#####5    | 23.2M/41.5M [05:36<03:08, 102kB/s]
     56%|#####5    | 23.2M/41.5M [05:36<03:12, 99.5kB/s]
     56%|#####5    | 23.2M/41.5M [05:37<03:16, 97.5kB/s]
     56%|#####5    | 23.2M/41.5M [05:37<03:19, 96.0kB/s]
     56%|#####6    | 23.2M/41.5M [05:37<03:21, 95.0kB/s]
     56%|#####6    | 23.3M/41.5M [05:37<0
 3:37, 88.1kB/s]
     56%|#####6    | 23.3M/41.5M [05:37<03:33, 89.4kB/s]
     56%|#####6    | 23.3M/41.5M [05:37<03:31, 90.3kB/s]
     56%|#####6    | 23.3M/41.5M [05:38<03:29, 91.0kB/s]
     56%|#####6    | 23.3M/41.5M [05:38<03:28, 91.6kB/s]
     56%|#####6    | 23.3M/41.5M [05:38<03:27, 91.9kB/s]
     56%|#####6    | 23.4M/41.5M [05:38<03:11, 99.4kB/s]
     56%|#####6    | 23.4M/41.5M [05:38<03:15, 97.3kB/s]
     56%|#####6    | 23.4M/41.5M [05:38<02:52, 110kB/s] 
     56%|#####6    | 23.4M/41.5M [05:39<02:39, 119kB/s]
     56%|#####6    | 23.4M/41.5M [05:39<02:50, 111kB/s]
     57%|#####6    | 23.5M/41.5M [05:39<02:38, 120kB/s]
     57%|#####6    | 23.5M/41.5M [05:39<02:30, 126kB/s]
     57%|#####6    | 23.5M/41.5M [05:39<02:25, 130kB/s]
     57%|#####6    | 23.5M/41.5M [05:40<02:22, 132kB/s]
     57%|#####6    | 23.5M/41.5M [05:40<03:02, 103kB/s]
     57%|#####6    | 23.6M/41.5M [05:40<02:18, 135kB/s]
     57%|#####6    | 23.6M/41.5M [05:40<02:25, 129kB/s]
     57%|#####6    | 
 23.6M/41.5M [05:40<02:23, 130kB/s]
     57%|#####6    | 23.6M/41.5M [05:40<02:23, 131kB/s]
     57%|#####6    | 23.6M/41.5M [05:41<02:25, 129kB/s]
     57%|#####7    | 23.7M/41.5M [05:41<02:21, 132kB/s]
     57%|#####7    | 23.7M/41.5M [05:41<02:12, 141kB/s]
     57%|#####7    | 23.7M/41.5M [05:41<02:13, 140kB/s]
     57%|#####7    | 23.7M/41.5M [05:41<02:13, 139kB/s]
     57%|#####7    | 23.8M/41.5M [05:41<02:13, 139kB/s]
     57%|#####7    | 23.8M/41.5M [05:42<03:02, 102kB/s]
     57%|#####7    | 23.8M/41.5M [05:42<02:01, 153kB/s]
     57%|#####7    | 23.8M/41.5M [05:42<02:09, 143kB/s]
     58%|#####7    | 23.9M/41.5M [05:42<02:29, 123kB/s]
     58%|#####7    | 23.9M/41.5M [05:43<02:34, 119kB/s]
     58%|#####7    | 23.9M/41.5M [05:43<02:28, 124kB/s]
     58%|#####7    | 23.9M/41.5M [05:43<03:02, 101kB/s]
     58%|#####7    | 24.0M/41.5M [05:43<02:11, 140kB/s]
     58%|#####7    | 24.0M/41.5M [05:43<02:20, 131kB/s]
     58%|#####7    | 24.0M/41.5M [05:44<02:17, 133kB/s]
     58%|#
 ####7    | 24.0M/41.5M [05:44<02:19, 131kB/s]
     58%|#####7    | 24.1M/41.5M [05:44<02:05, 145kB/s]
     58%|#####8    | 24.1M/41.5M [05:44<02:11, 139kB/s]
     58%|#####8    | 24.1M/41.5M [05:44<02:15, 134kB/s]
     58%|#####8    | 24.1M/41.5M [05:44<02:14, 136kB/s]
     58%|#####8    | 24.1M/41.5M [05:44<02:01, 150kB/s]
     58%|#####8    | 24.2M/41.5M [05:45<01:53, 161kB/s]
     58%|#####8    | 24.2M/41.5M [05:45<01:58, 153kB/s]
     58%|#####8    | 24.2M/41.5M [05:45<02:05, 144kB/s]
     58%|#####8    | 24.2M/41.5M [05:45<02:11, 138kB/s]
     58%|#####8    | 24.2M/41.5M [05:45<02:10, 138kB/s]
     58%|#####8    | 24.3M/41.5M [05:45<01:59, 152kB/s]
     59%|#####8    | 24.3M/41.5M [05:45<01:51, 162kB/s]
     59%|#####8    | 24.3M/41.5M [05:46<01:56, 154kB/s]
     59%|#####8    | 24.3M/41.5M [05:46<01:51, 162kB/s]
     59%|#####8    | 24.4M/41.5M [05:46<01:56, 154kB/s]
     59%|#####8    | 24.4M/41.5M [05:46<01:51, 162kB/s]
     59%|#####8    | 24.4M/41.5M [05:46<01:46, 169kB/s]
 
     59%|#####8    | 24.4M/41.5M [05:46<01:42, 175kB/s]
     59%|#####8    | 24.4M/41.5M [05:46<01:41, 177kB/s]
     59%|#####8    | 24.5M/41.5M [05:47<01:43, 172kB/s]
     59%|#####9    | 24.5M/41.5M [05:47<01:47, 166kB/s]
     59%|#####9    | 24.5M/41.5M [05:47<02:17, 129kB/s]
     59%|#####9    | 24.6M/41.5M [05:47<01:38, 181kB/s]
     59%|#####9    | 24.6M/41.5M [05:47<01:44, 169kB/s]
     59%|#####9    | 24.6M/41.5M [05:48<01:49, 162kB/s]
     59%|#####9    | 24.7M/41.5M [05:48<01:44, 169kB/s]
     59%|#####9    | 24.7M/41.5M [05:48<01:41, 174kB/s]
     60%|#####9    | 24.7M/41.5M [05:48<02:21, 124kB/s]
     60%|#####9    | 24.7M/41.5M [05:48<01:55, 152kB/s]
     60%|#####9    | 24.8M/41.5M [05:49<01:42, 171kB/s]
     60%|#####9    | 24.8M/41.5M [05:49<01:48, 162kB/s]
     60%|#####9    | 24.8M/41.5M [05:49<01:52, 155kB/s]
     60%|#####9    | 24.8M/41.5M [05:49<02:14, 130kB/s]
     60%|#####9    | 24.9M/41.5M [05:49<01:57, 148kB/s]
     60%|#####9    | 24.9M/41.5M [05:50<02:3
 9, 109kB/s]
     60%|######    | 24.9M/41.5M [05:50<02:23, 121kB/s]
     60%|######    | 24.9M/41.5M [05:50<02:28, 117kB/s]
     60%|######    | 25.0M/41.5M [05:50<02:37, 110kB/s]
     60%|######    | 25.0M/41.5M [05:50<02:44, 106kB/s]
     60%|######    | 25.0M/41.5M [05:51<02:49, 102kB/s]
     60%|######    | 25.0M/41.5M [05:51<02:37, 109kB/s]
     60%|######    | 25.0M/41.5M [05:51<02:40, 108kB/s]
     60%|######    | 25.0M/41.5M [05:51<02:31, 114kB/s]
     60%|######    | 25.1M/41.5M [05:51<02:35, 111kB/s]
     60%|######    | 25.1M/41.5M [05:52<02:28, 116kB/s]
     61%|######    | 25.1M/41.5M [05:52<02:20, 123kB/s]
     61%|######    | 25.1M/41.5M [05:52<02:26, 117kB/s]
     61%|######    | 25.1M/41.5M [05:52<02:18, 124kB/s]
     61%|######    | 25.2M/41.5M [05:52<02:13, 129kB/s]
     61%|######    | 25.2M/41.5M [05:52<02:09, 132kB/s]
     61%|######    | 25.2M/41.5M [05:53<02:00, 141kB/s]
     61%|######    | 25.2M/41.5M [05:53<02:01, 141kB/s]
     61%|######    | 25.3M/41.5M 
 [05:53<01:59, 142kB/s]
     61%|######    | 25.3M/41.5M [05:53<02:00, 141kB/s]
     61%|######1   | 25.3M/41.5M [05:53<01:49, 155kB/s]
     61%|######1   | 25.4M/41.5M [05:53<01:43, 164kB/s]
     61%|######1   | 25.4M/41.5M [05:54<01:47, 156kB/s]
     61%|######1   | 25.4M/41.5M [05:54<01:42, 165kB/s]
     61%|######1   | 25.4M/41.5M [05:54<01:38, 171kB/s]
     61%|######1   | 25.5M/41.5M [05:54<01:35, 176kB/s]
     61%|######1   | 25.5M/41.5M [05:54<01:33, 179kB/s]
     62%|######1   | 25.5M/41.5M [05:55<01:32, 181kB/s]
     62%|######1   | 25.6M/41.5M [05:55<01:25, 196kB/s]
     62%|######1   | 25.6M/41.5M [05:55<01:20, 207kB/s]
     62%|######1   | 25.7M/41.5M [05:55<01:12, 228kB/s]
     62%|######1   | 25.7M/41.5M [05:55<01:08, 243kB/s]
     62%|######2   | 25.8M/41.5M [05:55<01:01, 268kB/s]
     62%|######2   | 25.8M/41.5M [05:56<00:57, 285kB/s]
     62%|######2   | 25.9M/41.5M [05:56<00:52, 311kB/s]
     63%|######2   | 26.0M/41.5M [05:56<00:45, 356kB/s]
     63%|######2   | 2
 6.0M/41.5M [05:56<00:53, 304kB/s]
     63%|######2   | 26.1M/41.5M [05:56<00:39, 411kB/s]
     63%|######3   | 26.2M/41.5M [05:56<00:40, 399kB/s]
     63%|######3   | 26.2M/41.5M [05:57<00:42, 376kB/s]
     63%|######3   | 26.3M/41.5M [05:57<00:53, 298kB/s]
     64%|######3   | 26.4M/41.5M [05:57<00:41, 386kB/s]
     64%|######3   | 26.4M/41.5M [05:57<00:44, 358kB/s]
     64%|######3   | 26.5M/41.5M [05:58<00:57, 274kB/s]
     64%|######4   | 26.6M/41.5M [05:58<00:47, 329kB/s]
     64%|######4   | 26.6M/41.5M [05:58<00:51, 305kB/s]
     64%|######4   | 26.7M/41.5M [05:58<00:52, 298kB/s]
     64%|######4   | 26.7M/41.5M [05:58<00:55, 280kB/s]
     64%|######4   | 26.8M/41.5M [05:59<00:58, 266kB/s]
     65%|######4   | 26.8M/41.5M [05:59<00:57, 270kB/s]
     65%|######4   | 26.8M/41.5M [05:59<00:56, 272kB/s]
     65%|######4   | 26.9M/41.5M [05:59<00:58, 260kB/s]
     65%|######4   | 26.9M/41.5M [05:59<00:57, 266kB/s]
     65%|######5   | 27.0M/41.5M [05:59<00:56, 269kB/s]
     65%|##
 ####5   | 27.0M/41.5M [06:00<00:55, 272kB/s]
     65%|######5   | 27.1M/41.5M [06:00<00:58, 260kB/s]
     65%|######5   | 27.1M/41.5M [06:00<00:56, 266kB/s]
     65%|######5   | 27.2M/41.5M [06:00<00:55, 269kB/s]
     66%|######5   | 27.2M/41.5M [06:00<00:55, 272kB/s]
     66%|######5   | 27.2M/41.5M [06:01<00:54, 274kB/s]
     66%|######5   | 27.3M/41.5M [06:01<00:51, 289kB/s]
     66%|######5   | 27.4M/41.5M [06:01<00:51, 286kB/s]
     66%|######6   | 27.4M/41.5M [06:01<00:52, 284kB/s]
     66%|######6   | 27.5M/41.5M [06:01<00:49, 296kB/s]
     66%|######6   | 27.5M/41.5M [06:01<00:50, 291kB/s]
     66%|######6   | 27.6M/41.5M [06:02<00:48, 301kB/s]
     67%|######6   | 27.6M/41.5M [06:02<00:47, 308kB/s]
     67%|######6   | 27.7M/41.5M [06:02<00:48, 299kB/s]
     67%|######6   | 27.7M/41.5M [06:02<00:47, 307kB/s]
     67%|######6   | 27.8M/41.5M [06:02<00:46, 312kB/s]
     67%|######7   | 27.8M/41.5M [06:02<00:43, 330kB/s]
     67%|######7   | 27.9M/41.5M [06:03<00:43, 328kB/s]
      67%|######7   | 27.9M/41.5M [06:03<00:43, 327kB/s]
     67%|######7   | 28.0M/41.5M [06:03<00:41, 340kB/s]
     68%|######7   | 28.1M/41.5M [06:03<00:41, 336kB/s]
     68%|######7   | 28.1M/41.5M [06:03<00:40, 346kB/s]
     68%|######7   | 28.2M/41.5M [06:04<00:39, 354kB/s]
     68%|######8   | 28.2M/41.5M [06:04<00:38, 359kB/s]
     68%|######8   | 28.3M/41.5M [06:04<00:35, 390kB/s]
     68%|######8   | 28.4M/41.5M [06:04<00:34, 399kB/s]
     69%|######8   | 28.5M/41.5M [06:04<00:32, 418kB/s]
     69%|######8   | 28.6M/41.5M [06:04<00:30, 446kB/s]
     69%|######9   | 28.6M/41.5M [06:05<00:28, 465kB/s]
     69%|######9   | 28.7M/41.5M [06:05<00:27, 492kB/s]
     70%|######9   | 28.8M/41.5M [06:05<00:25, 526kB/s]
     70%|######9   | 28.9M/41.5M [06:05<00:23, 563kB/s]
     70%|#######   | 29.1M/41.5M [06:05<00:21, 602kB/s]
     70%|#######   | 29.2M/41.5M [06:05<00:20, 631kB/s]
     71%|#######   | 29.3M/41.5M [06:06<00:18, 678kB/s]
     71%|#######   | 29.4M/41.5M [06:06<00:16
 , 756kB/s]
     71%|#######1  | 29.6M/41.5M [06:06<00:15, 829kB/s]
     72%|#######1  | 29.7M/41.5M [06:06<00:14, 854kB/s]
     72%|#######1  | 29.8M/41.5M [06:06<00:14, 824kB/s]
     72%|#######2  | 29.9M/41.5M [06:06<00:14, 856kB/s]
     72%|#######2  | 30.1M/41.5M [06:06<00:12, 962kB/s]
     73%|#######2  | 30.2M/41.5M [06:07<00:16, 706kB/s]
     73%|#######3  | 30.4M/41.5M [06:07<00:11, 1.00MB/s]
     74%|#######3  | 30.5M/41.5M [06:07<00:11, 972kB/s] 
     74%|#######3  | 30.7M/41.5M [06:07<00:11, 1.00MB/s]
     74%|#######4  | 30.8M/41.5M [06:07<00:16, 703kB/s] 
     75%|#######4  | 30.9M/41.5M [06:08<00:13, 819kB/s]
     75%|#######4  | 31.1M/41.5M [06:08<00:14, 770kB/s]
     75%|#######5  | 31.2M/41.5M [06:08<00:14, 735kB/s]
     75%|#######5  | 31.3M/41.5M [06:08<00:14, 756kB/s]
     76%|#######5  | 31.4M/41.5M [06:08<00:13, 783kB/s]
     76%|#######5  | 31.5M/41.5M [06:08<00:17, 591kB/s]
     76%|#######6  | 31.6M/41.5M [06:09<00:14, 715kB/s]
     76%|#######6  | 31.7M/41.
 5M [06:09<00:14, 693kB/s]
     77%|#######6  | 31.8M/41.5M [06:09<00:16, 635kB/s]
     77%|#######6  | 31.9M/41.5M [06:09<00:16, 610kB/s]
     77%|#######7  | 32.0M/41.5M [06:09<00:16, 594kB/s]
     77%|#######7  | 32.1M/41.5M [06:09<00:15, 627kB/s]
     77%|#######7  | 32.1M/41.5M [06:10<00:16, 605kB/s]
     78%|#######7  | 32.2M/41.5M [06:10<00:18, 527kB/s]
     78%|#######7  | 32.3M/41.5M [06:10<00:17, 557kB/s]
     78%|#######7  | 32.4M/41.5M [06:10<00:16, 592kB/s]
     78%|#######8  | 32.4M/41.5M [06:10<00:15, 630kB/s]
     78%|#######8  | 32.5M/41.5M [06:10<00:15, 606kB/s]
     79%|#######8  | 32.6M/41.5M [06:10<00:16, 562kB/s]
     79%|#######8  | 32.7M/41.5M [06:11<00:15, 598kB/s]
     79%|#######8  | 32.8M/41.5M [06:11<00:14, 637kB/s]
     79%|#######9  | 32.8M/41.5M [06:11<00:14, 609kB/s]
     79%|#######9  | 32.9M/41.5M [06:11<00:16, 563kB/s]
     79%|#######9  | 33.0M/41.5M [06:11<00:14, 600kB/s]
     80%|#######9  | 33.1M/41.5M [06:11<00:13, 639kB/s]
     80%|#######9  
 | 33.1M/41.5M [06:11<00:14, 609kB/s]
     80%|#######9  | 33.2M/41.5M [06:11<00:15, 563kB/s]
     80%|########  | 33.3M/41.5M [06:12<00:14, 600kB/s]
     80%|########  | 33.4M/41.5M [06:12<00:13, 640kB/s]
     81%|########  | 33.4M/41.5M [06:12<00:13, 609kB/s]
     81%|########  | 33.5M/41.5M [06:12<00:14, 563kB/s]
     81%|########  | 33.6M/41.5M [06:12<00:13, 618kB/s]
     81%|########1 | 33.7M/41.5M [06:12<00:12, 652kB/s]
     81%|########1 | 33.7M/41.5M [06:12<00:13, 619kB/s]
     81%|########1 | 33.8M/41.5M [06:13<00:14, 569kB/s]
     82%|########1 | 33.9M/41.5M [06:13<00:13, 605kB/s]
     82%|########1 | 34.0M/41.5M [06:13<00:12, 642kB/s]
     82%|########2 | 34.0M/41.5M [06:13<00:12, 612kB/s]
     82%|########2 | 34.1M/41.5M [06:13<00:13, 565kB/s]
     82%|########2 | 34.2M/41.5M [06:13<00:12, 601kB/s]
     83%|########2 | 34.3M/41.5M [06:13<00:11, 641kB/s]
     83%|########2 | 34.3M/41.5M [06:13<00:12, 610kB/s]
     83%|########2 | 34.4M/41.5M [06:14<00:12, 582kB/s]
     83%
 |########3 | 34.5M/41.5M [06:14<00:11, 628kB/s]
     83%|########3 | 34.6M/41.5M [06:14<00:10, 663kB/s]
     84%|########3 | 34.7M/41.5M [06:14<00:11, 645kB/s]
     84%|########3 | 34.7M/41.5M [06:14<00:12, 590kB/s]
     84%|########3 | 34.8M/41.5M [06:14<00:11, 635kB/s]
     84%|########4 | 34.9M/41.5M [06:14<00:10, 640kB/s]
     84%|########4 | 35.0M/41.5M [06:15<00:09, 701kB/s]
     85%|########4 | 35.1M/41.5M [06:15<00:09, 673kB/s]
     85%|########4 | 35.2M/41.5M [06:15<00:10, 629kB/s]
     85%|########5 | 35.3M/41.5M [06:15<00:09, 680kB/s]
     85%|########5 | 35.4M/41.5M [06:15<00:12, 502kB/s]
     86%|########5 | 35.5M/41.5M [06:15<00:09, 688kB/s]
     86%|########5 | 35.6M/41.5M [06:15<00:09, 672kB/s]
     86%|########6 | 35.7M/41.5M [06:16<00:09, 672kB/s]
     86%|########6 | 35.8M/41.5M [06:16<00:09, 652kB/s]
     86%|########6 | 35.8M/41.5M [06:16<00:10, 570kB/s]
     87%|########6 | 35.9M/41.5M [06:16<00:10, 577kB/s]
     87%|########6 | 36.0M/41.5M [06:16<00:09, 637kB/
 s]
     87%|########7 | 36.1M/41.5M [06:16<00:08, 665kB/s]
     87%|########7 | 36.2M/41.5M [06:16<00:08, 646kB/s]
     87%|########7 | 36.2M/41.5M [06:17<00:09, 593kB/s]
     88%|########7 | 36.3M/41.5M [06:17<00:08, 615kB/s]
     88%|########7 | 36.5M/41.5M [06:17<00:08, 642kB/s]
     88%|########8 | 36.6M/41.5M [06:17<00:07, 660kB/s]
     88%|########8 | 36.7M/41.5M [06:17<00:07, 671kB/s]
     89%|########8 | 36.8M/41.5M [06:17<00:06, 733kB/s]
     89%|########8 | 36.9M/41.5M [06:18<00:08, 560kB/s]
     89%|########9 | 37.0M/41.5M [06:18<00:06, 699kB/s]
     89%|########9 | 37.1M/41.5M [06:18<00:07, 641kB/s]
     90%|########9 | 37.2M/41.5M [06:18<00:07, 613kB/s]
     90%|########9 | 37.3M/41.5M [06:18<00:07, 584kB/s]
     90%|######### | 37.4M/41.5M [06:19<00:07, 576kB/s]
     90%|######### | 37.5M/41.5M [06:19<00:07, 584kB/s]
     91%|######### | 37.6M/41.5M [06:19<00:07, 576kB/s]
     91%|######### | 37.7M/41.5M [06:19<00:06, 584kB/s]
     91%|#########1| 37.8M/41.5M [06:19<00
 :06, 590kB/s]
     91%|#########1| 37.9M/41.5M [06:19<00:06, 594kB/s]
     92%|#########1| 38.0M/41.5M [06:20<00:06, 596kB/s]
     92%|#########1| 38.1M/41.5M [06:20<00:07, 494kB/s]
     92%|#########2| 38.2M/41.5M [06:20<00:05, 616kB/s]
     92%|#########2| 38.3M/41.5M [06:20<00:06, 558kB/s]
     92%|#########2| 38.3M/41.5M [06:20<00:06, 530kB/s]
     93%|#########2| 38.4M/41.5M [06:20<00:06, 521kB/s]
     93%|#########2| 38.5M/41.5M [06:21<00:05, 520kB/s]
     93%|#########3| 38.6M/41.5M [06:21<00:05, 517kB/s]
     93%|#########3| 38.7M/41.5M [06:21<00:05, 529kB/s]
     93%|#########3| 38.8M/41.5M [06:21<00:05, 537kB/s]
     94%|#########3| 38.9M/41.5M [06:21<00:05, 543kB/s]
     94%|#########3| 39.0M/41.5M [06:22<00:04, 547kB/s]
     94%|#########4| 39.1M/41.5M [06:22<00:04, 550kB/s]
     94%|#########4| 39.2M/41.5M [06:22<00:04, 566kB/s]
     95%|#########4| 39.3M/41.5M [06:22<00:04, 577kB/s]
     95%|#########4| 39.4M/41.5M [06:22<00:03, 585kB/s]
     95%|#########5| 39.5M/41.5
 M [06:22<00:03, 590kB/s]
     95%|#########5| 39.6M/41.5M [06:23<00:03, 594kB/s]
     96%|#########5| 39.7M/41.5M [06:23<00:03, 611kB/s]
     96%|#########5| 39.8M/41.5M [06:23<00:02, 608kB/s]
     96%|#########6| 39.9M/41.5M [06:23<00:02, 607kB/s]
     96%|#########6| 40.0M/41.5M [06:23<00:02, 606kB/s]
     97%|#########6| 40.1M/41.5M [06:23<00:02, 619kB/s]
     97%|#########6| 40.2M/41.5M [06:24<00:02, 614kB/s]
     97%|#########7| 40.3M/41.5M [06:24<00:02, 611kB/s]
     97%|#########7| 40.4M/41.5M [06:24<00:01, 608kB/s]
     98%|#########7| 40.5M/41.5M [06:24<00:01, 607kB/s]
     98%|#########7| 40.6M/41.5M [06:24<00:01, 620kB/s]
     98%|#########8| 40.7M/41.5M [06:25<00:01, 615kB/s]
     98%|#########8| 40.8M/41.5M [06:25<00:01, 611kB/s]
     99%|#########8| 40.9M/41.5M [06:25<00:00, 623kB/s]
     99%|#########8| 41.0M/41.5M [06:25<00:00, 617kB/s]
     99%|#########9| 41.1M/41.5M [06:25<00:00, 613kB/s]
     99%|#########9| 41.2M/41.5M [06:25<00:00, 624kB/s]
    100%|#########9|
  41.3M/41.5M [06:26<00:00, 512kB/s]
    100%|#########9| 41.4M/41.5M [06:26<00:00, 604kB/s]
    100%|##########| 41.5M/41.5M [06:26<00:00, 113kB/s]
 
 
 
@@ -283,6 +283,11 @@ Look up prediction top 1 index in 1000 class synset.
 
 
 
+.. rst-class:: sphx-glr-timing
+
+   **Total running time of the script:** ( 6 minutes  50.106 seconds)
+
+
 .. _sphx_glr_download_how_to_compile_models_from_oneflow.py:
 
 
diff --git a/docs/_sources/how_to/compile_models/from_paddle.rst.txt b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
index be1dffda6..681b21e4a 100644
--- a/docs/_sources/how_to/compile_models/from_paddle.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
@@ -201,7 +201,7 @@ Look up prediction top 1 index in 1000 class synset.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  23.283 seconds)
+   **Total running time of the script:** ( 1 minutes  4.115 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_paddle.py:
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 7cb90a696..9de3c9a8f 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -79,7 +79,7 @@ Load a pretrained PyTorch model
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     10%|#         | 4.61M/44.7M [00:00<00:00, 48.2MB/s]
     42%|####2     | 18.8M/44.7M [00:00<00:00, 107MB/s] 
     73%|#######3  | 32.8M/44.7M [00:00<00:00, 125MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 128MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     53%|#####2    | 23.6M/44.7M [00:00<00:00, 248MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 272MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 1f99017ad..43d9b260b 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -370,11 +370,6 @@ Run the corresponding model on tensorflow
 
 
 
-.. rst-class:: sphx-glr-timing
-
-   **Total running time of the script:** ( 1 minutes  2.150 seconds)
-
-
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
 
 
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 3be9b8044..f710f77c7 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,15 +5,15 @@
 
 Computation times
 =================
-**05:35.207** total execution time for **how_to_compile_models** files:
+**11:30.048** total execution time for **how_to_compile_models** files:
 
-- **01:23.283**: :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)
-- **01:02.150**: :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``)
-- **00:55.986**: :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)
-- **00:31.428**: :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)
-- **00:24.838**: :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)
-- **00:21.133**: :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)
-- **00:20.815**: :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)
-- **00:19.230**: :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)
-- **00:13.473**: :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)
-- **00:02.871**: :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)
+- **06:50.106**: :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)
+- **01:04.115**: :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)
+- **00:59.156**: :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``)
+- **00:56.570**: :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)
+- **00:24.524**: :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)
+- **00:20.600**: :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)
+- **00:20.526**: :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)
+- **00:19.185**: :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)
+- **00:12.821**: :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)
+- **00:02.445**: :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 34e8baa66..0d532ebad 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -393,7 +393,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      16.1288      16.1018      16.3555      16.0681       0.0847   
+      15.8762      15.5255      16.7675      15.4631       0.4818   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 9eb2b8fbf..aaf6b010b 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -108,7 +108,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
      1%|1         | 1.94M/170M [00:00<00:08, 20.0MB/s]
      2%|2         | 3.84M/170M [00:00<00:09, 17.7MB/s]
      4%|3         | 6.73M/170M [00:00<00:07, 23.1MB/s]
      5%|5         | 9.01M/170M [00:00<00:07, 23.1MB/s]
      7%|6         | 11.9M/170M [00:00<00:06, 25.4MB/s]
      8%|8         | 14.3M/170M [00:00<00:06, 25.0MB/s]
     10%|9         | 16.9M/170M [00:00<00:06, 25.6MB/s]
     12%|#1        | 20.1M/170M [00:00<00:05, 28.0MB/s]
     13%|#3        | 22.8M/170M [00:01<00:06, 22.9MB/s]
     15%|#4        | 25.1M/170M [00:01<00:07, 21.3MB/s]
     16%|#6        | 27.5M/170M [00:01<00:06, 22.0MB/s]
     17%|#7        | 29.7M/170M [00:01<00:06, 21.7MB/s]
     20%|#9        | 33.4M/170M [00:01<00:05, 26.2MB/s]
     21%|##1       | 36.1M/170M [00:01<00:05, 27.0MB/s]
     23%|##2       | 38.8M/170M [00:01<00:05, 26.5MB/s]
     25%|##4       | 42.4M/170M [00:01<00:04, 29.4MB/s]
     28%|##7       | 47.1M/170M [00:01<00:03, 35.2MB/
 s]
     30%|##9       | 50.7M/170M [00:01<00:03, 35.5MB/s]
     32%|###2      | 55.1M/170M [00:02<00:03, 38.8MB/s]
     35%|###4      | 58.9M/170M [00:02<00:03, 35.9MB/s]
     37%|###6      | 62.4M/170M [00:02<00:03, 31.1MB/s]
     39%|###8      | 65.5M/170M [00:02<00:03, 31.3MB/s]
     40%|####      | 68.5M/170M [00:02<00:03, 29.8MB/s]
     42%|####2     | 72.1M/170M [00:02<00:03, 31.7MB/s]
     44%|####4     | 75.2M/170M [00:02<00:03, 26.7MB/s]
     46%|####5     | 77.9M/170M [00:03<00:04, 22.9MB/s]
     48%|####8     | 82.3M/170M [00:03<00:03, 27.9MB/s]
     51%|#####     | 86.0M/170M [00:03<00:02, 30.6MB/s]
     52%|#####2    | 89.2M/170M [00:03<00:03, 28.1MB/s]
     55%|#####4    | 92.7M/170M [00:03<00:02, 29.5MB/s]
     56%|#####6    | 95.6M/170M [00:03<00:03, 25.3MB/s]
     58%|#####8    | 98.7M/170M [00:03<00:02, 26.4MB/s]
     60%|#####9    | 102M/170M [00:03<00:02, 27.7MB/s] 
     62%|######2   | 106M/170M [00:03<00:02, 30.6MB/s]
     65%|######4   | 110M/170M [00:04<00:01
 , 32.0MB/s]
     67%|######6   | 113M/170M [00:04<00:01, 33.6MB/s]
     69%|######8   | 117M/170M [00:04<00:01, 35.2MB/s]
     71%|#######   | 120M/170M [00:04<00:01, 35.2MB/s]
     73%|#######3  | 125M/170M [00:04<00:01, 37.7MB/s]
     76%|#######5  | 128M/170M [00:04<00:01, 29.8MB/s]
     77%|#######7  | 131M/170M [00:04<00:01, 27.2MB/s]
     80%|########  | 136M/170M [00:04<00:01, 33.1MB/s]
     83%|########2 | 140M/170M [00:05<00:00, 34.5MB/s]
     85%|########4 | 144M/170M [00:05<00:00, 32.5MB/s]
     87%|########6 | 147M/170M [00:05<00:00, 27.3MB/s]
     88%|########8 | 150M/170M [00:05<00:00, 25.0MB/s]
     90%|########9 | 152M/170M [00:05<00:00, 25.0MB/s]
     91%|#########1| 155M/170M [00:05<00:00, 24.2MB/s]
     93%|#########2| 158M/170M [00:05<00:00, 26.2MB/s]
     95%|#########4| 161M/170M [00:05<00:00, 24.7MB/s]
     96%|#########5| 163M/170M [00:06<00:00, 24.3MB/s]
     97%|#########7| 165M/170M [00:06<00:00, 24.0MB/s]
     99%|#########8| 168M/170M [00:06<00:00, 22.8M
 B/s]
    100%|##########| 170M/170M [00:06<00:00, 27.9MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
     11%|#1        | 19.3M/170M [00:00<00:00, 203MB/s]
     27%|##6       | 45.8M/170M [00:00<00:00, 246MB/s]
     42%|####2     | 72.1M/170M [00:00<00:00, 260MB/s]
     58%|#####7    | 98.3M/170M [00:00<00:00, 266MB/s]
     73%|#######3  | 124M/170M [00:00<00:00, 269MB/s] 
     88%|########8 | 150M/170M [00:00<00:00, 269MB/s]
    100%|##########| 170M/170M [00:00<00:00, 264MB/s]
     /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -253,7 +253,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  10.453 seconds)
+   **Total running time of the script:** ( 2 minutes  56.175 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index b63f1ae82..fad356b93 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -187,7 +187,7 @@ training. Other models require a full post training calibration.
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     10%|#         | 1.38M/13.6M [00:00<00:00, 14.5MB/s]
     33%|###3      | 4.54M/13.6M [00:00<00:00, 25.4MB/s]
     51%|#####1    | 6.97M/13.6M [00:00<00:00, 24.1MB/s]
     73%|#######3  | 9.90M/13.6M [00:00<00:00, 25.3MB/s]
     91%|######### | 12.3M/13.6M [00:00<00:00, 24.2MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 23.7MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 167MB/s]
 
 
 
@@ -344,7 +344,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.2412      90.2071      90.8011      90.0909       0.1241   
+      90.1441      90.0327      91.7263      89.8729       0.2893   
                
 
 
@@ -384,7 +384,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  5.816 seconds)
+   **Total running time of the script:** ( 1 minutes  3.230 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index 847d33e8a..323045be1 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -351,7 +351,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      117.0151     116.9941     119.1223     115.3432      0.9252   
+      117.5831     117.4965     119.9891     116.6112      0.6789   
                
 
 
@@ -385,7 +385,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  58.607 seconds)
+   **Total running time of the script:** ( 1 minutes  55.881 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 13df07cbd..add212834 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -221,7 +221,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  21.769 seconds)
+   **Total running time of the script:** ( 1 minutes  7.308 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 1ec8f3c79..8723cf707 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -137,7 +137,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      5%|5         | 6999/132723 [00:00<00:01, 69969.38KB/s]
     12%|#1        | 15727/132723 [00:00<00:01, 80147.99KB/s]
     19%|#8        | 24560/132723 [00:00<00:01, 83881.04KB/s]
     25%|##4       | 32949/132723 [00:00<00:01, 80221.28KB/s]
     31%|###1      | 41794/132723 [00:00<00:01, 83102.74KB/s]
     38%|###8      | 50653/132723 [00:00<00:00, 84931.94KB/s]
     45%|####4     | 59479/132723 [00:00<00:00, 86003.43KB/s]
     52%|#####1    | 68362/132723 [00:00<00:00, 86889.43KB/s]
     58%|#####8    | 77304/132723 [00:00<00:00, 87674.86KB/s]
     65%|######4   | 86187/132723 [00:01<00:00, 88027.99KB/s]
     72%|#######1  | 95042/132723 [00:01<00:00, 88183.97KB/s]
     78%|#######8  | 103982/132723 [00:01<00:00, 88551.68KB/s]
     85%|########5 | 112918/132723 [00:01<00:00, 88793.65KB/s]
     92%|#########1| 121800/132723 [00:01<00:00, 88484.18KB/s]
     98%|#########8| 130681/132723 [00:01<00:00, 88579.50KB/s]
    100%|#######
 ###| 132723/132723 [00:01<00:00, 86307.82KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      5%|5         | 6757/132723 [00:00<00:01, 67559.27KB/s]
     12%|#1        | 15560/132723 [00:00<00:01, 79595.33KB/s]
     18%|#8        | 24463/132723 [00:00<00:01, 83901.77KB/s]
     25%|##5       | 33310/132723 [00:00<00:01, 85702.78KB/s]
     32%|###1      | 42202/132723 [00:00<00:01, 86861.86KB/s]
     38%|###8      | 51061/132723 [00:00<00:00, 87447.02KB/s]
     45%|####5     | 59862/132723 [00:00<00:00, 87630.07KB/s]
     52%|#####1    | 68626/132723 [00:00<00:00, 87538.91KB/s]
     58%|#####8    | 77439/132723 [00:00<00:00, 87717.76KB/s]
     65%|######4   | 86267/132723 [00:01<00:00, 87889.71KB/s]
     72%|#######1  | 95173/132723 [00:01<00:00, 88246.73KB/s]
     78%|#######8  | 104014/132723 [00:01<00:00, 88287.51KB/s]
     85%|########5 | 112862/132723 [00:01<00:00, 88340.22KB/s]
     92%|#########1| 121697/132723 [00:01<00:00, 88298.19KB/s]
     98%|#########8| 130598/132723 [00:01<00:00, 88509.55KB/s]
    100%|#######
 ###| 132723/132723 [00:01<00:00, 86935.39KB/s]
 
 
 
@@ -202,7 +202,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  17.852 seconds)
+   **Total running time of the script:** ( 2 minutes  17.619 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index aa8771b2f..81cf31b98 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
 
 Computation times
 =================
-**11:45.672** total execution time for **how_to_deploy_models** files:
+**10:09.203** total execution time for **how_to_deploy_models** files:
 
-- **03:10.453**: :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
-- **02:21.769**: :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)
-- **02:17.852**: :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
-- **01:58.607**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
-- **01:05.816**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)
-- **00:28.562**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)
-- **00:22.433**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
-- **00:00.181**: :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)
+- **02:56.175**: :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
+- **02:17.619**: :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
+- **01:55.881**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
+- **01:07.308**: :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)
+- **01:03.230**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)
+- **00:27.226**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)
+- **00:21.587**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
+- **00:00.176**: :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index 6885583b1..a3b9239d8 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -423,7 +423,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip094144d5-fdd2-4b2b-9231-75f8c8ece3c7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip7d4ccf53-5f97-4de0-9c7a-502154829388 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
@@ -525,7 +525,7 @@ Now, to actually convert the entire network, we have written `a pass in Relay <h
 
  .. code-block:: none
 
-      Check failed: (lower) is false: Intrinsic lowering function for target llvm, intrinsic name tir.sqrt, type 150 not found
+      Check failed: (lower) is false: FloatImm lowering function for target llvm type 150 not found
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 8ed00780a..b60d3f7ae 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,9 +5,9 @@
 
 Computation times
 =================
-**00:37.308** total execution time for **how_to_extend_tvm** files:
+**00:36.948** total execution time for **how_to_extend_tvm** files:
 
-- **00:33.934**: :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
-- **00:02.178**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)
-- **00:01.008**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)
-- **00:00.187**: :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)
+- **00:33.587**: :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
+- **00:02.171**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)
+- **00:01.009**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)
+- **00:00.181**: :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index 55981273a..d6c79f29f 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -199,10 +199,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 5844us [5844us] (45.16%; 45.16%)
-    FoldScaleAxis: 7098us [2us] (54.84%; 54.84%)
-            FoldConstant: 7096us [1481us] (54.83%; 99.97%)
-                    InferType: 5615us [5615us] (43.39%; 79.13%)
+    InferType: 6108us [6108us] (45.64%; 45.64%)
+    FoldScaleAxis: 7276us [2us] (54.36%; 54.36%)
+            FoldConstant: 7273us [1522us] (54.35%; 99.97%)
+                    InferType: 5751us [5751us] (42.97%; 79.07%)
 
 
 
@@ -239,10 +239,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 5678us [5678us] (44.65%; 44.65%)
-    FoldScaleAxis: 7040us [2us] (55.35%; 55.35%)
-            FoldConstant: 7038us [1455us] (55.34%; 99.98%)
-                    InferType: 5583us [5583us] (43.90%; 79.32%)
+    InferType: 5810us [5810us] (44.54%; 44.54%)
+    FoldScaleAxis: 7233us [2us] (55.46%; 55.46%)
+            FoldConstant: 7232us [1511us] (55.44%; 99.98%)
+                    InferType: 5721us [5721us] (43.86%; 79.11%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index 80660e445..91faf28bf 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -295,7 +295,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 35.339016 ms
+    Convolution: 40.653210 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index 3350e6edb..ab3720b96 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -628,7 +628,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 8.956960 ms
+    conv2d with tensor core: 10.205289 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index b85cc644c..a580839ab 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -118,8 +118,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.018245
-    Baseline: 3.306084
+    Numpy running time: 0.017612
+    Baseline: 3.390872
 
 
 
@@ -210,7 +210,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.289051
+    Opt1: 0.307004
 
 
 
@@ -309,7 +309,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.329916
+    Opt2: 0.338097
 
 
 
@@ -401,7 +401,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.117861
+    Opt3: 0.112429
 
 
 
@@ -520,7 +520,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.110710
+    Opt4: 0.110156
 
 
 
@@ -638,7 +638,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.111893
+    Opt5: 0.111282
 
 
 
@@ -759,7 +759,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.146405
+    Opt6: 0.144558
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 9277c2f4b..e0b14a4fc 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
 
 Computation times
 =================
-**00:34.502** total execution time for **how_to_optimize_operators** files:
+**00:34.835** total execution time for **how_to_optimize_operators** files:
 
-- **00:31.909**: :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)
-- **00:01.424**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
-- **00:01.169**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)
+- **00:32.206**: :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)
+- **00:01.432**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
+- **00:01.198**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 8db986d25..c6b9dbb1e 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,11 +5,11 @@
 
 Computation times
 =================
-**05:00.449** total execution time for **how_to_tune_with_autoscheduler** files:
-
-- **02:28.084**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
-- **01:18.796**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)
-- **00:39.963**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)
-- **00:17.032**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)
-- **00:08.373**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)
-- **00:08.201**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)
+**04:50.844** total execution time for **how_to_tune_with_autoscheduler** files:
+
+- **02:19.891**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
+- **01:17.690**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)
+- **00:39.503**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)
+- **00:16.807**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)
+- **00:08.768**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)
+- **00:08.185**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index e5456e9c1..0a2d3d624 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -223,11 +223,11 @@ cooperative fetching, unrolling and operator fusion.
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
       preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
       attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 32;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [16]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [16], [], scope="local", align=64)[0] = 0f32
+      allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [1568]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [512]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
         conv2d_nchw_1[1] = 0f32
         conv2d_nchw_1[2] = 0f32
         conv2d_nchw_1[3] = 0f32
@@ -241,943 +241,74 @@ cooperative fetching, unrolling and operator fusion.
         conv2d_nchw_1[11] = 0f32
         conv2d_nchw_1[12] = 0f32
         conv2d_nchw_1[13] = 0f32
-        conv2d_nchw_1[14] = 0f32
-        conv2d_nchw_1[15] = 0f32
         for (rc.outer.outer: int32, 0, 16) {
-          for (rx.outer.outer: int32, 0, 3) {
-            let cse_var_2: int32 = (rc.outer.outer*1568)
-            let cse_var_1: int32 = (rc.outer.outer*288)
-             {
-              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 49)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 7), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtyp [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 14), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dty [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 147)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 21), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 28), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 245)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 35), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 42), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 343)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 49), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 56), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 441)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 335)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 70), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 539)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 77), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 84), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 637)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 91), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 98), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 735)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 105), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 112), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 833)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 119), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 678)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 931)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 133), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 140), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1029)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 147), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 154), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1127)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 161), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 168), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1225)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 175), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 182), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1323)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1021)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 196), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1421)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 203), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 210), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1519)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 217), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 224), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1617)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 231), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 238), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1715)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 245), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1364)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1813)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 259), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 266), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1911)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 273), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32,  [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 280), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              if @tir.likely((threadIdx.x_1 < 7), dtype=bool) {
-                pad_temp.shared_1[(threadIdx.x_1 + 2009)] = 0f32
-              }
-              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope="shared")[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 49)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 49), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 49), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 98)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 98), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 2), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 147)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 147), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 51), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 196)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 196), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 4), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 245)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 245), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 53), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 294)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 294), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 6), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 343)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 343), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 55), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 392), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 8), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 441)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 441), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 57), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 490)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 490), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 10), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 539)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 539), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 59), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 588)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 588), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 12), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 637)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 637), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 61), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 686)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 686), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 14), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 735)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 735), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 63), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 16), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 833)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 833), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 65), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 882)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 882), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 18), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 931)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 931), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 67), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 980)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 980), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 20), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1029)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1029), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 69), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1078)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1078), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 22), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1127)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1127), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 71), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1176), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 24), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1225)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1225), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 73), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1274)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1274), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 26), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1323)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1323), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 75), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1372)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1372), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 28), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1421)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1421), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 77), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              kernel.shared_1[(threadIdx.x_2 + 1470)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1470), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 30), 96)*3)) + rx.outer.outer)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
-              if @tir.likely((threadIdx.x_2 < 17), dtype=bool) {
-                kernel.shared_1[(threadIdx.x_2 + 1519)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1519), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 79), 96)*3)) + rx.outer.outer)]
-              }
-              for (rc.outer.inner: int32, 0, 2) {
-                let cse_var_3: int32 = (rc.outer.inner*48)
-                 {
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[cse_var_3]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 96)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 192)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 288)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 97)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 193)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 289)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 2)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 98)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 194)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 290)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 3)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 99)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 195)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 291)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 4)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 100)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 196)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 292)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 5)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 101)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 197)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 293)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 6)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 102)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 198)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 294)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 7)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 103)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 199)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 295)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 8)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 104)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 200)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 296)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 9)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 105)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 201)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 297)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 10)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 106)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 202)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 298)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 11)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 107)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 203)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 299)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 12)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 108)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 204)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 300)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 13)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 109)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 205)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 301)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 14)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 110)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 206)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 302)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 15)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 111)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 207)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 303)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 16)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 112)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 208)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 304)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 17)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 113)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 209)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 305)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 18)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 114)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 210)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 306)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 19)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 115)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 211)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 307)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 20)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 116)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 212)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 308)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 21)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 117)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 213)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 309)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 22)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 118)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 214)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 310)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 23)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 119)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 215)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 311)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 24)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 120)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 216)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 312)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 25)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 121)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 217)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 313)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 26)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 122)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 218)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 314)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 27)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 123)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 219)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 315)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 28)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 124)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 220)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 316)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 29)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 125)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 221)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 317)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 30)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 126)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 222)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 318)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 31)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 127)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 223)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 319)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 32)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 128)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 224)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 320)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 33)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 129)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 225)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 321)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 34)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 130)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 226)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 322)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 35)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 131)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 227)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 323)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 36)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 132)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 228)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 324)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 37)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 133)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 229)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 325)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 38)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 134)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 230)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 326)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 39)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 135)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 231)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 327)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 40)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 136)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 232)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 328)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 41)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 137)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 233)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 329)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 42)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 138)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 234)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 330)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 43)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 139)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 235)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 331)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 44)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 140)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 236)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 332)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 45)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 141)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 237)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 333)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 46)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 142)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 238)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 334)]))
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 47)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 143)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 239)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 335)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 384)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 480)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 576)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 672)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 385)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 481)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 577)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 673)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 386)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 482)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 578)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 674)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 387)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 483)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 579)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 675)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 388)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 484)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 580)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 676)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 389)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 485)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 581)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 677)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 390)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 486)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 582)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 678)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 391)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 487)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 583)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 679)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 392)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 488)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 584)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 680)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 393)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 489)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 585)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 681)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 394)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 490)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 586)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 682)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 395)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 491)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 587)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 683)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 396)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 492)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 588)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 684)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 397)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 493)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 589)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 685)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 398)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 494)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 590)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 686)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 399)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 495)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 591)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 687)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 400)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 496)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 592)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 688)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 401)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 497)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 593)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 689)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 402)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 498)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 594)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 690)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 403)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 499)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 595)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 691)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 404)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 500)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 596)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 692)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 405)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 501)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 597)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 693)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 406)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 502)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 598)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 694)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 407)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 503)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 599)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 695)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 408)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 504)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 600)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 696)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 409)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 505)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 601)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 697)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 410)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 506)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 602)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 698)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 411)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 507)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 603)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 699)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 412)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 508)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 604)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 700)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 413)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 509)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 605)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 701)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 414)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 510)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 606)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 702)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 415)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 511)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 607)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 703)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 416)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 512)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 608)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 704)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 417)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 513)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 609)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 705)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 418)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 514)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 610)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 706)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 419)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 515)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 611)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 707)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 420)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 516)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 612)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 708)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 421)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 517)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 613)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 709)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 422)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 518)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 614)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 710)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 423)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 519)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 615)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 711)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 424)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 520)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 616)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 712)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 425)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 521)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 617)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 713)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 426)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 522)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 618)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 714)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 427)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 523)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 619)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 715)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 428)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 524)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 620)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 716)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 429)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 525)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 621)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 717)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 430)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 526)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 622)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 718)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 431)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 527)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 623)]))
-                  conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 719)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 768)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 864)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 960)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1056)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 769)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 865)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 961)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1057)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 770)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 866)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 962)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1058)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 771)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 867)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 963)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1059)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 772)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 868)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 964)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1060)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 773)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 869)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 965)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1061)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 774)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 870)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 966)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1062)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 775)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 871)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 967)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1063)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 776)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 872)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 968)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1064)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 777)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 873)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 969)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1065)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 778)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 874)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 970)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1066)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 779)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 875)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 971)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1067)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 780)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 876)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 972)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1068)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 781)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 877)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 973)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1069)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 782)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 878)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 974)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1070)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 783)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 879)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 975)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1071)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 784)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 880)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 976)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1072)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 785)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 881)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 977)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1073)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 786)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 882)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 978)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1074)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 787)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 883)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 979)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1075)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 788)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 884)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 980)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1076)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 789)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 885)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 981)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1077)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 790)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 886)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 982)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1078)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 791)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 887)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 983)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1079)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 792)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 888)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 984)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1080)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 793)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 889)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 985)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1081)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 794)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 890)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 986)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1082)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 795)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 891)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 987)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1083)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 796)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 892)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 988)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1084)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 797)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 893)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 989)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1085)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 798)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 894)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 990)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1086)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 799)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 895)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 991)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1087)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 800)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 896)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 992)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1088)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 801)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 897)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 993)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1089)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 802)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 898)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 994)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1090)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 803)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 899)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 995)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1091)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 804)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 900)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 996)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1092)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 805)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 901)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 997)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1093)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 806)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 902)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 998)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1094)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 807)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 903)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 999)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1095)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 808)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 904)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1000)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1096)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 809)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 905)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1001)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1097)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 810)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 906)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1002)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1098)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 811)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 907)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1003)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1099)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 812)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 908)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1004)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1100)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 813)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 909)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1005)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1101)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 814)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 910)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1006)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1102)]))
-                  conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 815)]))
-                  conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 911)]))
-                  conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1007)]))
-                  conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1103)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1152)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1248)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1344)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1440)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1153)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1249)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1345)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1441)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1154)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1250)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1346)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1442)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1155)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1251)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1347)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1443)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1156)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1252)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1348)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1444)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1157)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1253)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1349)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1445)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1158)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1254)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1350)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1446)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1159)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1255)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1351)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1447)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1160)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1256)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1352)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1448)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1161)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1257)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1353)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1449)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1162)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1258)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1354)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1450)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1163)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1259)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1355)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1451)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1164)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1260)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1356)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1452)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1165)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1261)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1357)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1453)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1166)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1262)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1358)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1454)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1167)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1263)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1359)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1455)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1168)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1264)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1360)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1456)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1169)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1265)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1361)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1457)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1170)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1266)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1362)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1458)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1171)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1267)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1363)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1459)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1172)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1268)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1364)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1460)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1173)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1269)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1365)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1461)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1174)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1270)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1366)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1462)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1175)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1271)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1367)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1463)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1176)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1272)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1368)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1464)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1177)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1273)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1369)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1465)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1178)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1274)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1370)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1466)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1179)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1275)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1371)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1467)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1180)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1276)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1372)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1468)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1181)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1277)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1373)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1469)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1182)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1278)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1374)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1470)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1183)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1279)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1375)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1471)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1184)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1280)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1376)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1472)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1185)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1281)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1377)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1473)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1186)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1282)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1378)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1474)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1187)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1283)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1379)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1475)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1188)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1284)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1380)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1476)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1189)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1285)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1381)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1477)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1190)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1286)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1382)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1478)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1191)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1287)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1383)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1479)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1192)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1288)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1384)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1480)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1193)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1289)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1385)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1481)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1194)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1290)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1386)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1482)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1195)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1291)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1387)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1483)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1196)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1292)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1388)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1484)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1197)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1293)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1389)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1485)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1198)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1294)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1390)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1486)]))
-                  conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1199)]))
-                  conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1295)]))
-                  conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1391)]))
-                  conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1487)]))
+          for (ry.outer.outer: int32, 0, 3) {
+            for (rx.outer.outer: int32, 0, 3) {
+              let cse_var_2: int32 = (rc.outer.outer*288)
+              let cse_var_1: int32 = (ry.outer.outer*3)
+               {
+                for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 28) {
+                  let cse_var_3: int32 = (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56)
+                  attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                  pad_temp.shared_1: Buffer(pad_temp.shared, float32, [1568], [], scope="shared")[(cse_var_3 + threadIdx.x_1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7))) && ((ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7)) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x [...]
+                }
+                attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1: Buffer(kernel.shared, float32, [512], [], scope="shared")[threadIdx.x_2] = kernel[((((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 32)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 7), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 14), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 21), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 32256)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 35), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 42), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 49), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 64512)]
+                attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+                if @tir.likely((threadIdx.x_2 < 8), dtype=bool) {
+                  kernel.shared_1[(threadIdx.x_2 + 504)] = kernel[((((((blockIdx.x*73728) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer) + 69120)]
+                }
+                for (rc.outer.inner: int32, 0, 16) {
+                  for (ff.outer.inner: int32, 0, 2) {
+                    let cse_var_10: int32 = (ff.outer.inner*7)
+                    let cse_var_9: int32 = (cse_var_10 + 6)
+                    let cse_var_8: int32 = (cse_var_10 + 5)
+                    let cse_var_7: int32 = (cse_var_10 + 4)
+                    let cse_var_6: int32 = (cse_var_10 + 3)
+                    let cse_var_5: int32 = (cse_var_10 + 2)
+                    let cse_var_4: int32 = (cse_var_10 + 1)
+                     {
+                      conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                      conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                      conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                      conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                      conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                      conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                      conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                      conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 49)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                      conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 50)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                      conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 51)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                      conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 52)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                      conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 53)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                      conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 54)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                      conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 55)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                    }
+                  }
                 }
               }
             }
           }
         }
-        for (i1.inner: int32, 0, 16) {
-          compute[(((blockIdx.x*784) + (i1.inner*49)) + threadIdx.x)] = max((conv2d_nchw_1[i1.inner] + bias[((blockIdx.x*16) + i1.inner)]), 0f32)
+        for (i1.inner: int32, 0, 2) {
+          for (i3.inner: int32, 0, 7) {
+            compute[(((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*16) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+          }
         }
       }
     }
@@ -1230,7 +361,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.229 ms
+    Execution time of this operator: 0.330 ms
 
 
 
@@ -1274,21 +405,21 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
-    conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=4)
-    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=4)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=1)
+    conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
     conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
     conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
-    conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
+    conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=7)
     conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=16)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=2)
-    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
+    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
+    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
     conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
     conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
     conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
@@ -1296,14 +427,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=16)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=1)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
     compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
     compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
     compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
     compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -1323,14 +454,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 1024)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 16)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -1348,10 +479,10 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(49) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[16];
-      __shared__ float pad_temp_shared[2016];
-      __shared__ float kernel_shared[1536];
+    extern "C" __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[14];
+      __shared__ float pad_temp_shared[1568];
+      __shared__ float kernel_shared[512];
       conv2d_nchw[0] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
@@ -1366,864 +497,51 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       conv2d_nchw[11] = 0.000000e+00f;
       conv2d_nchw[12] = 0.000000e+00f;
       conv2d_nchw[13] = 0.000000e+00f;
-      conv2d_nchw[14] = 0.000000e+00f;
-      conv2d_nchw[15] = 0.000000e+00f;
       for (int rc_outer_outer = 0; rc_outer_outer < 16; ++rc_outer_outer) {
-        for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
-          __syncthreads();
-          pad_temp_shared[((int)threadIdx.x)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 49)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 49) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 98) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 147)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 147) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 196)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 196) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 245)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 245) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 294) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 343)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 343) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 392)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 392) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 441)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 335)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 490) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 539)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 539) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 588) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 637)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 637) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 686) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 735)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 735) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 784) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 833)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 833) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 882)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 678)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 931)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 931) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 980) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1029)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1029) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1078) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1127)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1127) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1176) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1225)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1225) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1274)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1274) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1323)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1021)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1372) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1421)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1421) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1470) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1519)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1519) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1568) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1617)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1617) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1666) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1715)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1715) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1764)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1364)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1813)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1813) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1862) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1911)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1911) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1960) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-          if (((int)threadIdx.x) < 7) {
-            pad_temp_shared[(((int)threadIdx.x) + 2009)] = 0.000000e+00f;
-          }
-          kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 49)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 49) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 49) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 98)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 98) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 2) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 147)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 147) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 51) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 196)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 196) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 4) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 245)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 245) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 53) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 294)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 294) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 6) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 343)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 343) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 55) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 392)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 8) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 441)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 441) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 57) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 490)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 490) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 10) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 539)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 539) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 59) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 588)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 588) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 12) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 637)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 637) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 61) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 686)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 686) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 14) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 735)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 735) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 63) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 784)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 16) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 833)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 833) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 65) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 882)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 882) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 18) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 931)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 931) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 67) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 980)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 980) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 20) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1029)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1029) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 69) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1078)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1078) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 22) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1127)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1127) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 71) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1176) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 24) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1225)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1225) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 73) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1274)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1274) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 26) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1323)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1323) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 75) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1372)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1372) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 28) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1421)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1421) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 77) % 96) * 3)) + rx_outer_outer)];
-          kernel_shared[(((int)threadIdx.x) + 1470)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1470) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 30) * 3)) + rx_outer_outer)];
-          if (((int)threadIdx.x) < 17) {
-            kernel_shared[(((int)threadIdx.x) + 1519)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1519) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 79) * 3)) + rx_outer_outer)];
-          }
-          __syncthreads();
-          for (int rc_outer_inner = 0; rc_outer_inner < 2; ++rc_outer_inner) {
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[(rc_outer_inner * 48)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 96)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 192)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 288)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 97)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 193)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 289)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 2)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 98)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 194)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 290)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 3)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 99)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 195)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 291)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 4)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 100)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 196)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 292)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 5)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 101)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 197)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 293)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 6)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 102)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 198)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 294)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 7)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 103)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 199)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 295)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 8)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 104)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 200)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 296)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 9)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 105)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 201)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 297)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 10)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 106)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 202)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 298)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 11)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 107)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 203)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 299)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 12)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 108)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 204)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 300)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 13)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 109)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 205)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 301)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 14)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 110)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 206)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 302)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 15)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 111)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 207)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 303)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 16)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 112)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 208)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 304)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 17)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 113)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 209)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 305)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 18)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 114)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 210)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 306)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 19)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 115)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 211)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 307)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 20)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 116)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 212)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 308)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 21)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 117)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 213)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 309)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 22)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 118)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 214)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 310)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 23)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 119)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 215)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 311)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 24)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 120)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 216)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 312)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 25)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 121)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 217)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 313)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 26)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 122)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 218)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 314)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 27)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 123)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 219)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 315)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 28)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 124)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 220)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 316)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 29)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 125)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 221)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 317)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 30)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 126)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 222)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 318)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 31)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 127)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 223)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 319)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 32)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 128)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 224)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 320)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 33)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 129)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 225)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 321)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 34)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 130)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 226)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 322)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 35)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 131)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 227)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 323)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 36)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 132)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 228)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 324)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 37)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 133)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 229)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 325)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 38)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 134)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 230)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 326)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 39)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 135)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 231)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 327)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 40)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 136)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 232)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 328)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 41)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 137)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 233)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 329)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 42)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 138)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 234)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 330)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 43)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 139)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 235)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 331)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 44)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 140)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 236)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 332)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 45)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 141)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 237)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 333)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 46)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 142)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 238)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 334)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 47)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 143)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 239)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 335)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 384)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 480)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 576)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 672)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 385)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 481)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 577)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 673)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 386)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 482)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 578)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 674)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 387)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 483)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 579)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 675)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 388)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 484)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 580)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 676)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 389)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 485)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 581)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 677)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 390)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 486)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 582)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 678)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 391)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 487)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 583)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 679)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 392)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 488)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 584)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 680)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 393)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 489)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 585)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 681)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 394)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 490)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 586)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 682)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 395)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 491)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 587)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 683)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 396)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 492)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 588)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 684)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 397)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 493)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 589)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 685)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 398)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 494)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 590)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 686)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 399)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 495)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 591)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 687)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 400)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 496)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 592)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 688)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 401)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 497)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 593)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 689)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 402)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 498)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 594)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 690)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 403)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 499)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 595)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 691)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 404)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 500)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 596)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 692)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 405)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 501)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 597)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 693)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 406)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 502)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 598)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 694)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 407)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 503)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 599)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 695)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 408)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 504)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 600)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 696)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 409)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 505)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 601)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 697)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 410)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 506)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 602)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 698)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 411)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 507)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 603)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 699)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 412)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 508)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 604)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 700)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 413)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 509)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 605)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 701)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 414)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 510)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 606)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 702)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 415)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 511)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 607)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 703)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 416)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 512)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 608)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 704)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 417)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 513)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 609)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 705)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 418)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 514)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 610)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 706)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 419)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 515)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 611)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 707)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 420)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 516)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 612)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 708)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 421)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 517)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 613)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 709)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 422)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 518)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 614)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 710)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 423)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 519)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 615)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 711)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 424)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 520)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 616)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 712)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 425)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 521)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 617)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 713)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 426)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 522)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 618)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 714)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 427)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 523)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 619)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 715)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 428)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 524)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 620)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 716)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 429)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 525)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 621)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 717)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 430)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 526)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 622)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 718)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 431)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 527)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 623)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 719)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 768)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 864)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 960)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1056)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 769)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 865)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 961)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1057)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 770)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 866)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 962)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1058)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 771)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 867)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 963)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1059)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 772)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 868)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 964)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1060)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 773)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 869)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 965)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1061)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 774)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 870)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 966)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1062)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 775)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 871)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 967)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1063)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 776)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 872)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 968)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1064)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 777)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 873)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 969)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1065)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 778)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 874)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 970)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1066)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 779)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 875)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 971)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1067)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 780)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 876)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 972)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1068)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 781)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 877)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 973)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1069)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 782)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 878)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 974)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1070)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 783)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 879)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 975)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1071)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 784)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 880)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 976)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1072)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 785)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 881)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 977)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1073)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 786)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 882)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 978)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1074)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 787)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 883)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 979)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1075)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 788)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 884)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 980)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1076)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 789)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 885)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 981)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1077)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 790)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 886)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 982)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1078)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 791)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 887)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 983)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1079)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 792)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 888)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 984)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1080)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 793)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 889)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 985)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1081)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 794)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 890)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 986)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1082)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 795)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 891)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 987)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1083)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 796)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 892)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 988)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1084)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 797)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 893)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 989)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1085)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 798)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 894)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 990)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1086)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 799)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 895)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 991)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1087)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 800)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 896)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 992)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1088)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 801)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 897)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 993)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1089)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 802)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 898)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 994)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1090)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 803)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 899)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 995)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1091)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 804)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 900)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 996)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1092)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 805)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 901)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 997)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1093)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 806)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 902)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 998)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1094)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 807)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 903)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 999)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1095)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 808)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 904)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1000)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1096)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 809)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 905)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1001)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1097)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 810)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 906)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1002)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1098)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 811)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 907)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1003)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1099)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 812)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 908)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1004)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1100)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 813)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 909)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1005)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1101)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 814)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 910)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1006)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1102)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 815)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 911)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1007)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1103)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1152)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1248)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1344)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1440)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1153)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1249)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1345)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1441)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1154)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1250)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1346)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1442)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1155)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1251)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1347)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1443)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1156)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1252)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1348)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1444)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1157)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1253)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1349)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1445)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1158)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1254)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1350)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1446)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1159)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1255)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1351)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1447)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1160)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1256)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1352)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1448)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1161)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1257)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1353)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1449)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1162)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1258)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1354)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1450)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1163)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1259)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1355)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1451)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1164)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1260)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1356)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1452)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1165)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1261)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1357)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1453)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1166)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1262)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1358)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1454)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1167)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1263)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1359)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1455)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1168)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1264)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1360)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1456)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1169)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1265)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1361)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1457)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1170)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1266)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1362)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1458)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1171)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1267)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1363)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1459)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1172)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1268)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1364)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1460)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1173)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1269)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1365)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1461)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1174)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1270)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1366)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1462)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1175)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1271)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1367)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1463)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1176)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1272)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1368)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1464)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1177)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1273)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1369)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1465)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1178)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1274)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1370)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1466)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1179)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1275)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1371)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1467)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1180)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1276)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1372)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1468)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1181)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1277)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1373)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1469)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1182)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1278)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1374)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1470)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1183)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1279)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1375)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1471)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1184)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1280)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1376)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1472)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1185)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1281)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1377)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1473)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1186)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1282)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1378)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1474)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1187)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1283)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1379)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1475)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1188)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1284)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1380)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1476)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1189)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1285)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1381)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1477)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1190)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1286)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1382)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1478)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1191)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1287)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1383)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1479)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1192)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1288)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1384)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1480)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1193)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1289)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1385)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1481)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1194)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1290)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1386)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1482)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1195)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1291)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1387)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1483)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1196)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1292)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1388)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1484)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1197)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1293)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1389)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1485)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1198)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1294)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1390)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1486)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1199)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1295)]));
-            conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1391)]));
-            conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1487)]));
+        for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
+          for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
+            __syncthreads();
+            for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer < 28; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) {
+              pad_temp_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 56) + ((int)threadIdx.x))] = (((((1 <= (ry_outer_outer + (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 8) + (((int)threadIdx.x) / 7)) % 7))) && ((ry_outer_outer + (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 8) + (((int)threadIdx.x) / 7)) % 7)) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (ax [...]
+            }
+            kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) >> 5) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+            kernel_shared[(((int)threadIdx.x) + 56)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 56) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 24) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+            kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 112) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+            kernel_shared[(((int)threadIdx.x) + 168)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 168) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+            kernel_shared[(((int)threadIdx.x) + 224)] = kernel[(((((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) >> 5) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer) + 32256)];
+            kernel_shared[(((int)threadIdx.x) + 280)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 280) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 24) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+            kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 336) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+            kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+            kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) >> 5) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer) + 64512)];
+            if (((int)threadIdx.x) < 8) {
+              kernel_shared[(((int)threadIdx.x) + 504)] = kernel[((((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 24) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer) + 69120)];
+            }
+            __syncthreads();
+            for (int rc_outer_inner = 0; rc_outer_inner < 16; ++rc_outer_inner) {
+              for (int ff_outer_inner = 0; ff_outer_inner < 2; ++ff_outer_inner) {
+                conv2d_nchw[(ff_outer_inner * 7)] = (conv2d_nchw[(ff_outer_inner * 7)] + (pad_temp_shared[((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7))] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+                conv2d_nchw[((ff_outer_inner * 7) + 1)] = (conv2d_nchw[((ff_outer_inner * 7) + 1)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+                conv2d_nchw[((ff_outer_inner * 7) + 2)] = (conv2d_nchw[((ff_outer_inner * 7) + 2)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+                conv2d_nchw[((ff_outer_inner * 7) + 3)] = (conv2d_nchw[((ff_outer_inner * 7) + 3)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+                conv2d_nchw[((ff_outer_inner * 7) + 4)] = (conv2d_nchw[((ff_outer_inner * 7) + 4)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+                conv2d_nchw[((ff_outer_inner * 7) + 5)] = (conv2d_nchw[((ff_outer_inner * 7) + 5)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+                conv2d_nchw[((ff_outer_inner * 7) + 6)] = (conv2d_nchw[((ff_outer_inner * 7) + 6)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+                conv2d_nchw[(ff_outer_inner * 7)] = (conv2d_nchw[(ff_outer_inner * 7)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 49)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+                conv2d_nchw[((ff_outer_inner * 7) + 1)] = (conv2d_nchw[((ff_outer_inner * 7) + 1)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 50)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+                conv2d_nchw[((ff_outer_inner * 7) + 2)] = (conv2d_nchw[((ff_outer_inner * 7) + 2)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 51)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+                conv2d_nchw[((ff_outer_inner * 7) + 3)] = (conv2d_nchw[((ff_outer_inner * 7) + 3)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 52)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+                conv2d_nchw[((ff_outer_inner * 7) + 4)] = (conv2d_nchw[((ff_outer_inner * 7) + 4)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 53)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+                conv2d_nchw[((ff_outer_inner * 7) + 5)] = (conv2d_nchw[((ff_outer_inner * 7) + 5)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 54)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+                conv2d_nchw[((ff_outer_inner * 7) + 6)] = (conv2d_nchw[((ff_outer_inner * 7) + 6)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 55)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+              }
+            }
           }
         }
       }
-      for (int i1_inner = 0; i1_inner < 16; ++i1_inner) {
-        compute[(((((int)blockIdx.x) * 784) + (i1_inner * 49)) + ((int)threadIdx.x))] = max((conv2d_nchw[i1_inner] + bias[((((int)blockIdx.x) * 16) + i1_inner)]), 0.000000e+00f);
+      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
+        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
+          compute[(((((((int)blockIdx.x) * 784) + ((((int)threadIdx.x) / 7) * 98)) + (i1_inner * 49)) + ((((int)threadIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[(((((int)blockIdx.x) * 16) + ((((int)threadIdx.x) / 7) * 2)) + i1_inner)]), 0.000000e+00f);
+        }
       }
     }
 
@@ -2282,7 +600,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  28.084 seconds)
+   **Total running time of the script:** ( 2 minutes  19.891 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index 144ebb904..a3e6315d4 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -614,7 +614,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       9.6952       9.7011       9.7414       9.6430       0.0404   
+       9.7473       9.7540       9.7861       9.7017       0.0348   
                
 
 
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index 970decefb..7a6804ca8 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -633,7 +633,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      771.7962     773.1405     776.8791     765.3691      4.7941   
+      746.9453     746.1865     751.6476     743.0019      3.5701   
                
 
 
@@ -658,7 +658,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  18.796 seconds)
+   **Total running time of the script:** ( 1 minutes  17.690 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index cdb878c29..66b904b6c 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -362,119 +362,75 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-      preflattened_buffer_map = {placeholder_5: placeholder_15: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_16: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_9: placeholder_18: Buffer(placeholder_14, float32, [128, 512], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
-      for (i0.outer.i1.outer.fused: int32, 0, 256) "parallel" {
-        allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
-          for (i.outer.inner: int32, 0, 8) {
-            let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32)
-            let cse_var_1: int32 = (i.outer.inner*32)
-             {
-              compute_5: Buffer(compute_4, float32, [256], [])[cse_var_1] = 0f32
-              compute_5[(cse_var_1 + 1)] = 0f32
-              compute_5[(cse_var_1 + 2)] = 0f32
-              compute_5[(cse_var_1 + 3)] = 0f32
-              compute_5[(cse_var_1 + 4)] = 0f32
-              compute_5[(cse_var_1 + 5)] = 0f32
-              compute_5[(cse_var_1 + 6)] = 0f32
-              compute_5[(cse_var_1 + 7)] = 0f32
-              compute_5[(cse_var_1 + 8)] = 0f32
-              compute_5[(cse_var_1 + 9)] = 0f32
-              compute_5[(cse_var_1 + 10)] = 0f32
-              compute_5[(cse_var_1 + 11)] = 0f32
-              compute_5[(cse_var_1 + 12)] = 0f32
-              compute_5[(cse_var_1 + 13)] = 0f32
-              compute_5[(cse_var_1 + 14)] = 0f32
-              compute_5[(cse_var_1 + 15)] = 0f32
-              compute_5[(cse_var_1 + 16)] = 0f32
-              compute_5[(cse_var_1 + 17)] = 0f32
-              compute_5[(cse_var_1 + 18)] = 0f32
-              compute_5[(cse_var_1 + 19)] = 0f32
-              compute_5[(cse_var_1 + 20)] = 0f32
-              compute_5[(cse_var_1 + 21)] = 0f32
-              compute_5[(cse_var_1 + 22)] = 0f32
-              compute_5[(cse_var_1 + 23)] = 0f32
-              compute_5[(cse_var_1 + 24)] = 0f32
-              compute_5[(cse_var_1 + 25)] = 0f32
-              compute_5[(cse_var_1 + 26)] = 0f32
-              compute_5[(cse_var_1 + 27)] = 0f32
-              compute_5[(cse_var_1 + 28)] = 0f32
-              compute_5[(cse_var_1 + 29)] = 0f32
-              compute_5[(cse_var_1 + 30)] = 0f32
-              compute_5[(cse_var_1 + 31)] = 0f32
-              for (elem_idx: int32, 0, (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
-                let cse_var_35: int32 = (cse_var_1 + 1)
-                let cse_var_34: int32 = (cse_var_1 + 10)
-                let cse_var_33: int32 = (cse_var_1 + 11)
-                let cse_var_32: int32 = (cse_var_1 + 12)
-                let cse_var_31: int32 = (cse_var_1 + 13)
-                let cse_var_30: int32 = (cse_var_1 + 14)
-                let cse_var_29: int32 = (cse_var_1 + 15)
-                let cse_var_28: int32 = (cse_var_1 + 16)
-                let cse_var_27: int32 = (cse_var_1 + 17)
-                let cse_var_26: int32 = (cse_var_1 + 18)
-                let cse_var_25: int32 = (cse_var_1 + 19)
-                let cse_var_24: int32 = (cse_var_1 + 2)
-                let cse_var_23: int32 = (cse_var_1 + 20)
-                let cse_var_22: int32 = (cse_var_1 + 21)
-                let cse_var_21: int32 = (cse_var_1 + 22)
-                let cse_var_20: int32 = (cse_var_1 + 24)
+      preflattened_buffer_map = {placeholder_6: placeholder_15: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_17: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_18: Buffer(placeholder_13, int32, [33], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
+      for (i0.outer.i1.outer.fused: int32, 0, 32) "parallel" {
+        allocate(compute_4: Pointer(global float32), float32, [2048]), storage_scope = global {
+          for (i.outer.inner: int32, 0, 2) {
+            for (i.inner.init: int32, 0, 64) {
+              let cse_var_1: int32 = ((i.outer.inner*1024) + (i.inner.init*16))
+               {
+                compute_5: Buffer(compute_4, float32, [2048], [])[cse_var_1] = 0f32
+                compute_5[(cse_var_1 + 1)] = 0f32
+                compute_5[(cse_var_1 + 2)] = 0f32
+                compute_5[(cse_var_1 + 3)] = 0f32
+                compute_5[(cse_var_1 + 4)] = 0f32
+                compute_5[(cse_var_1 + 5)] = 0f32
+                compute_5[(cse_var_1 + 6)] = 0f32
+                compute_5[(cse_var_1 + 7)] = 0f32
+                compute_5[(cse_var_1 + 8)] = 0f32
+                compute_5[(cse_var_1 + 9)] = 0f32
+                compute_5[(cse_var_1 + 10)] = 0f32
+                compute_5[(cse_var_1 + 11)] = 0f32
+                compute_5[(cse_var_1 + 12)] = 0f32
+                compute_5[(cse_var_1 + 13)] = 0f32
+                compute_5[(cse_var_1 + 14)] = 0f32
+                compute_5[(cse_var_1 + 15)] = 0f32
+              }
+            }
+            for (elem_idx: int32, 0, (placeholder_3[(i0.outer.i1.outer.fused + 1)] - placeholder_3[i0.outer.i1.outer.fused])) {
+              for (i.inner: int32, 0, 64) {
                 let cse_var_19: int32 = (elem_idx*16)
-                let cse_var_18: int32 = (cse_var_1 + 9)
-                let cse_var_17: int32 = (cse_var_1 + 8)
-                let cse_var_16: int32 = (cse_var_1 + 7)
-                let cse_var_15: int32 = (cse_var_1 + 6)
-                let cse_var_14: int32 = (cse_var_1 + 5)
-                let cse_var_13: int32 = (cse_var_1 + 4)
-                let cse_var_12: int32 = (cse_var_1 + 23)
-                let cse_var_11: int32 = (cse_var_1 + 30)
-                let cse_var_10: int32 = (cse_var_1 + 3)
-                let cse_var_9: int32 = (cse_var_1 + 29)
-                let cse_var_8: int32 = (cse_var_1 + 28)
-                let cse_var_7: int32 = (cse_var_1 + 27)
-                let cse_var_6: int32 = (cse_var_1 + 26)
-                let cse_var_5: int32 = (cse_var_1 + 25)
-                let cse_var_4: int32 = (cse_var_1 + 31)
-                let cse_var_3: int32 = ((floordiv(i0.outer.i1.outer.fused, 32)*4096) + (i.outer.inner*512))
+                let cse_var_18: int32 = ((i.outer.inner*16384) + (i.inner*256))
+                let cse_var_17: int32 = ((i.outer.inner*1024) + (i.inner*16))
+                let cse_var_16: int32 = (cse_var_17 + 1)
+                let cse_var_15: int32 = (cse_var_17 + 11)
+                let cse_var_14: int32 = (cse_var_17 + 12)
+                let cse_var_13: int32 = (cse_var_17 + 13)
+                let cse_var_12: int32 = (cse_var_17 + 14)
+                let cse_var_11: int32 = (cse_var_17 + 15)
+                let cse_var_10: int32 = (cse_var_17 + 2)
+                let cse_var_9: int32 = (cse_var_17 + 3)
+                let cse_var_8: int32 = (cse_var_17 + 4)
+                let cse_var_7: int32 = (cse_var_17 + 5)
+                let cse_var_6: int32 = (cse_var_17 + 6)
+                let cse_var_5: int32 = (cse_var_17 + 7)
+                let cse_var_4: int32 = (cse_var_17 + 8)
+                let cse_var_3: int32 = (cse_var_17 + 9)
+                let cse_var_2: int32 = (cse_var_17 + 10)
                  {
-                  compute_5[cse_var_1] = (compute_5[cse_var_1] + (placeholder_1[((placeholder_3[cse_var_2]*16) + cse_var_19)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_35] = (compute_5[cse_var_35] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 1)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_24] = (compute_5[cse_var_24] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 2)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 3)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 4)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 5)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 6)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 7)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 8)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 9)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_34] = (compute_5[cse_var_34] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 10)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_33] = (compute_5[cse_var_33] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 11)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_32] = (compute_5[cse_var_32] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 12)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_31] = (compute_5[cse_var_31] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 13)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_30] = (compute_5[cse_var_30] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 14)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_29] = (compute_5[cse_var_29] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 15)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_28] = (compute_5[cse_var_28] + (placeholder_1[((placeholder_3[cse_var_2]*16) + cse_var_19)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_27] = (compute_5[cse_var_27] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 1)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_26] = (compute_5[cse_var_26] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 2)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_25] = (compute_5[cse_var_25] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 3)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_23] = (compute_5[cse_var_23] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 4)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_22] = (compute_5[cse_var_22] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 5)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_21] = (compute_5[cse_var_21] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 6)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 7)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 8)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 9)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 10)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 11)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 12)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 13)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 14)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
-                  compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 15)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
+                  compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_2] = (compute_5[cse_var_2] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
                 }
               }
             }
           }
-          for (i0.inner: int32, 0, 16) {
-            let cse_var_36: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
-            compute[ramp(cse_var_36, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_36, 1, 16)]), broadcast(0f32, 16))
+          for (i0.inner: int32, 0, 128) {
+            let cse_var_20: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*16))
+            compute[ramp(cse_var_20, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_20, 1, 16)]), broadcast(0f32, 16))
           }
         }
       }
@@ -528,7 +484,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 3.549 ms
+    Execution time of this operator: 1.827 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index 3f2f54888..333dc9e5c 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:44.106** total execution time for **how_to_tune_with_autotvm** files:
+**00:44.100** total execution time for **how_to_tune_with_autotvm** files:
 
-- **00:43.278**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
-- **00:00.215**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
-- **00:00.207**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
-- **00:00.204**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
-- **00:00.203**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+- **00:43.294**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
+- **00:00.208**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
+- **00:00.208**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
+- **00:00.195**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
+- **00:00.194**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 63cf95880..fd5b0177f 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -859,8 +859,8 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2885496
-    No: 6   GFLOPS: 42.30/42.30     result: MeasureResult(costs=(0.005473275105263158,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.56661057472229, timestamp=1652754069.3949578) [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
-    No: 7   GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 6   GFLOPS: 93.19/93.19     result: MeasureResult(costs=(0.0024842372291666664,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7560157775878906, timestamp=1652782145.6015983)      [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
+    No: 7   GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -983,7 +983,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 16, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6225319
-    No: 8   GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 8   GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1106,7 +1106,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,943546
-    No: 9   GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 9   GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1229,7 +1229,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2868708
-    No: 10  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 10  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
         res = future.result()
       File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1247,7 +1247,7 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4691833
-    No: 11  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 11  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1370,7 +1370,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1042124
-    No: 12  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 12  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1493,7 +1493,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10013405
-    No: 13  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 13  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1616,7 +1616,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6732082
-    No: 14  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 14  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1739,7 +1739,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7536735
-    No: 15  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 15  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1862,7 +1862,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,482121
-    No: 16  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 16  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1985,7 +1985,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2824525
-    No: 17  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 17  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -2108,7 +2108,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4559286
-    No: 18  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 18  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -2231,7 +2231,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 32, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9677544
-    No: 19  GFLOPS: 0.00/42.30      result: Traceback (most recent call last):
+    No: 19  GFLOPS: 0.00/93.19      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 721, in __call__
         yield remote, remote.load_module(os.path.split(build_result.filename)[1])
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 685, in run_through_rpc
@@ -2319,7 +2319,7 @@ for this template
       15: _PyEval_EvalFrameDefault
       14: 0x0000000000537c30
       13: _PyObject_FastCallKeywords
-      12: 0x00007f3d1d202fa2
+      12: 0x00007f5b9a242fa2
       11: _ctypes_callproc
       10: ffi_call
       9: ffi_call_unix64
@@ -2384,7 +2384,7 @@ for this template
       21: _PyFunction_FastCallKeywords
       20: _PyEval_EvalFrameDefault
       19: _PyFunction_FastCall      [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6390073
-    No: 20  GFLOPS: 143.71/143.71   result: MeasureResult(costs=(0.00161091818,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4025070667266846, timestamp=1652754095.8003697)      [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
+    No: 20  GFLOPS: 144.66/144.66   result: MeasureResult(costs=(0.00160031061,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3949148654937744, timestamp=1652782171.9501283)      [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
 
 
 
@@ -2437,7 +2437,7 @@ and measure running time.
 
     Best config:
     [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
-    Time cost of this operator: 0.002074
+    Time cost of this operator: 0.001957
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index d19cd1ccb..34a165576 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -292,10 +292,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  309.3     98.755   (1, 2, 10, 10, 3)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.0       0.958    (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.901     0.288    (1, 1, 10, 10, 3)  1       1        
-    Total_time                                    -                                             313.201   -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  310.7     98.722   (1, 2, 10, 10, 3)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.121     0.992    (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.901     0.286    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             314.722   -        -                  -       -        
 
 
 
@@ -357,10 +357,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  197.2     98.6     (1, 1, 10, 10, 6)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.9       0.95     (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.9       0.45     (1, 3, 10, 10, 1)  1       1        
-    Total_time                                    -                                             200.0     -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  118.5     97.805   (1, 6, 10, 10, 1)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.738     1.434    (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.922     0.761    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             121.16    -        -                  -       -        
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index a9aad1ddf..97e84f92f 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:45.444** total execution time for **how_to_work_with_microtvm** files:
+**00:45.243** total execution time for **how_to_work_with_microtvm** files:
 
-- **00:41.316**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)
-- **00:03.538**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)
-- **00:00.199**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)
-- **00:00.196**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``)
-- **00:00.195**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tvmc.py` (``micro_tvmc.py``)
+- **00:41.099**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)
+- **00:03.547**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)
+- **00:00.237**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)
+- **00:00.182**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tvmc.py` (``micro_tvmc.py``)
+- **00:00.178**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``)
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 98cd4f2d1..667485ad0 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
 
 Computation times
 =================
-**00:08.814** total execution time for **how_to_work_with_relay** files:
+**00:08.838** total execution time for **how_to_work_with_relay** files:
 
-- **00:06.941**: :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)
-- **00:01.660**: :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)
-- **00:00.213**: :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)
+- **00:06.807**: :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)
+- **00:01.833**: :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)
+- **00:00.199**: :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index d78916e07..93d141838 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
 
 Computation times
 =================
-**00:05.609** total execution time for **how_to_work_with_schedules** files:
+**00:05.499** total execution time for **how_to_work_with_schedules** files:
 
-- **00:02.071**: :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)
-- **00:01.126**: :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)
-- **00:00.719**: :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)
-- **00:00.718**: :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)
-- **00:00.296**: :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)
-- **00:00.239**: :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``)
-- **00:00.225**: :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)
-- **00:00.214**: :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)
+- **00:02.037**: :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)
+- **00:01.136**: :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)
+- **00:00.703**: :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)
+- **00:00.687**: :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)
+- **00:00.288**: :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)
+- **00:00.231**: :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``)
+- **00:00.216**: :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)
+- **00:00.202**: :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index 090ecdc47..a56b2ef7c 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -318,7 +318,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C}
       preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpl1ncbzhq/input0.cc'\nsource_filename = \"/tmp/tmpl1ncbzhq/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpv01nlxvs/input0.cc'\nsource_filename = \"/tmp/tmpv01nlxvs/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 1ba0a79e0..61d434186 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:19.778** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:20.165** total execution time for **topic_vta_tutorials_autotvm** files:
 
-- **00:19.592**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
-- **00:00.186**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)
+- **00:19.983**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
+- **00:00.183**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 25b5a0998..7144b0f98 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -265,7 +265,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 20.80s!
+    resnet18_v1 inference graph built in 20.70s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index b864140ce..65d9cfb09 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -301,7 +301,7 @@ The compilation steps are:
 
     /workspace/python/tvm/relay/build_module.py:431: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 14.53s!
+    yolov3-tiny inference graph built in 14.58s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index 257846bfa..c3c196b45 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**01:27.391** total execution time for **topic_vta_tutorials_frontend** files:
+**01:27.320** total execution time for **topic_vta_tutorials_frontend** files:
 
-- **00:46.427**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
-- **00:40.964**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
+- **00:46.491**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
+- **00:40.829**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 25b6f1945..80c6cd410 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:03.534** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.460** total execution time for **topic_vta_tutorials_optimize** files:
 
-- **00:02.997**: :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
-- **00:00.537**: :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
+- **00:02.934**: :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
+- **00:00.526**: :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index 4d7578589..21d8aad3c 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:00.954** total execution time for **topic_vta_tutorials** files:
+**00:00.977** total execution time for **topic_vta_tutorials** files:
 
-- **00:00.482**: :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
-- **00:00.472**: :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
+- **00:00.493**: :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
+- **00:00.484**: :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index 69bad7ac9..f70e34f58 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -184,7 +184,7 @@ trials, we can load the best schedule from the log file and apply it.
 
  .. code-block:: none
 
-
+    *E
 
 
 
@@ -306,7 +306,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 94.705 ms
+    Execution time of this operator: 93.749 ms
 
 
 
@@ -415,6 +415,11 @@ Expression (TE) language that demonstrates how TVM can optimize computational
 operations.
 
 
+.. rst-class:: sphx-glr-timing
+
+   **Total running time of the script:** ( 1 minutes  11.014 seconds)
+
+
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index 80a05dc55..8a4e18ec9 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -271,7 +271,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 492.0485737900015, 'median': 491.6691768000021, 'std': 0.8640093566493087}
+    {'mean': 490.8931876000008, 'median': 490.8230106499957, 'std': 0.5127112764728715}
 
 
 
@@ -485,31 +485,31 @@ the tuning data to.
 
  .. code-block:: none
 
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.55/  17.55 GFLOPS | Progress: (4/20) | 5.86 s
    [Task  1/25]  Current/Best:    6.17/  17.55 GFLOPS | Progress: (8/20) | 8.79 s
    [Task  1/25]  Current/Best:   11.56/  22.87 GFLOPS | Progress: (12/20) | 11.23 s
    [Task  1/25]  Current/Best:   16.80/  22.87 GFLOPS | Progress: (16/20) | 12.90 s
    [Task  1/25]  Current/Best:   11.64/  23.94 GFLOPS | Progress: (20/20) | 14.61 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.16/  13.14 GFLOPS | Progress: (4/20) | 3.78 s
    [Task  2/25]  Current/Best:   13.94/  18.32 GFLOPS | Progress: (8/20) | 5.05 s
    [Task  2/25]  Current/Best:   21.19/  21.19 GFLOPS | Progress: (12/20) | 6.34 s
    [Task  2/25]  Current/Best:   12.37/  21.19 GFLOPS | Progress: (16/20) | 7.59 s
    [Task  2/25]  Current/Best:   19.47/  21.19 GFLOPS | Progress: (20/20) | 9.19 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.54 GFLOPS | Progress: (4/20) | 5.76 s
    [Task  3/25]  Current/Best:   15.60/  16.89 GFLOPS | Progress: (8/20) | 7.65 s
    [Task  3/25]  Current/Best:   14.91/  16.89 GFLOPS | Progress: (12/20) | 9.34 s
    [Task  3/25]  Current/Best:    7.17/  23.80 GFLOPS | Progress: (16/20) | 11.22 s
    [Task  3/25]  Current/Best:   12.70/  23.80 GFLOPS | Progress: (20/20) | 15.73 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.55/  20.43 GFLOPS | Progress: (4/20) | 2.27 s
    [Task  4/25]  Current/Best:    6.51/  20.43 GFLOPS | Progress: (8/20) | 7.02 s
    [Task  4/25]  Current/Best:   22.42/  22.42 GFLOPS | Progress: (12/20) | 11.87 s
    [Task  4/25]  Current/Best:   16.79/  22.42 GFLOPS | Progress: (16/20) | 14.27 s
    [Task  4/25]  Current/Best:   13.38/  22.42 GFLOPS | Progress: (20/20) | 16.36 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.77/  10.46 GFLOPS | Progress: (4/20) | 2.48 s
    [Task  5/25]  Current/Best:   11.72/  12.79 GFLOPS | Progress: (8/20) | 4.52 s
    [Task  5/25]  Current/Best:   11.79/  18.08 GFLOPS | Progress: (12/20) | 7.71 s
    [Task  5/25]  Current/Best:   11.88/  22.89 GFLOPS | Progress: (16/20) | 9.17 s
    [Task  5/25]  Current/Best:   12.02/  22.89 GFLOPS | Progress: (20/20) | 11.03 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.20/  20.81 GFLOPS | Progress: (4/20) | 4.02 s
    [Task  6/25]  Current/Best:   19.02/  20.81 GFLOPS | Progress: (8/20) | 5.78 s
    [Task  6/25]  Current/Best:   13.33/  20.81 GFLOPS | Progress: (12/20) | 7.71 s
    [Task  6/25]  Current/Best:   19.91/  20.81 GFLOPS | Progress: (16/20) | 9.94 s
    [Task  6/25]  Current/Best:    3.73/  20.81 GFLOPS | Progress: (20/20) | 12.44 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.23/  12.22 GFLOPS | Progress: (4/20) | 3.51 s
    [Task  7/25]  Current/Best:   20.27/  21.02 GFLOPS | Progress: (8/20) | 5.00 s
    [Task  7/25]  Current/Best:   15.60/  21.02 GFLOPS | Progress: (12/20) | 6.90 s
    [Task  7/25]  Current/Best:   12.29/  21.02 GFLOPS | Progress: (16/20) | 8.93 s
    [Task  7/25]  Current/Best:    6.38/  21.77 GFLOPS | Progress: (20/20) | 11.36 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.00/  13.99 GFLOPS | Progress: (4/20) | 2.83 s
    [Task  8/25]  Current/Best:    9.53/  13.99 GFLOPS | Progress: (8/20) | 7.90 s
    [Task  8/25]  Current/Best:   12.47/  13.99 GFLOPS | Progress: (12/20) | 14.31 s
    [Task  8/25]  Current/Best:   18.79/  18.79 GFLOPS | Progress: (16/20) | 16.41 s
    [Task  8/25]  Current/Best:   19.48/  19.48 GFLOPS | Progress: (20/20) | 23.48 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.40/  15.45 GFLOPS | Progress: (4/20) | 18.81 s
    [Task  9/25]  Current/Best:   23.49/  23.49 GFLOPS | Progress: (8/20) | 20.52 s
    [Task  9/25]  Current/Best:    8.25/  23.49 GFLOPS | Progress: (12/20) | 23.06 s
    [Task  9/25]  Current/Best:   18.02/  23.49 GFLOPS | Progress: (16/20) | 25.93 s
    [Task  9/25]  Current/Best:    9.13/  23.49 GFLOPS | Progress: (20/20) | 34.58 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.09/  18.09 GFLOPS | Progress: (4/20) | 2.45 s
    [Task 10/25]  Current/Best:   15.56/  18.09 GFLOPS | Progress: (8/20) | 4.06 s
    [Task 10/25]  Current/Best:   12.19/  18.92 GFLOPS | Progress: (12/20) | 5.59 s
    [Task 10/25]  Current/Best:   19.14/  20.46 GFLOPS | Progress: (16/20) | 6.67 s
    [Task 10/25]  Current/Best:    8.69/  20.46 GFLOPS | Progress: (20/20
 ) | 8.19 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.33/  18.08 GFLOPS | Progress: (4/20) | 3.19 s
    [Task 11/25]  Current/Best:   17.11/  18.08 GFLOPS | Progress: (8/20) | 6.01 s
    [Task 11/25]  Current/Best:   18.19/  18.19 GFLOPS | Progress: (12/20) | 8.03 s
    [Task 11/25]  Current/Best:   11.86/  21.22 GFLOPS | Progress: (16/20) | 10.99 s
    [Task 11/25]  Current/Best:   19.48/  21.54 GFLOPS | Progress: (20/20) | 13.08 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.83/  17.94 GFLOPS | Progress: (4/20) | 5.70 s
    [Task 12/25]  Current/Best:    5.26/  17.94 GFLOPS | Progress: (8/20) | 9.60 s
    [Task 12/25]  Current/Best:   18.87/  18.93 GFLOPS | Progress: (12/20) | 11.58 s
    [Task 12/25]  Current/Best:   15.47/  18.93 GFLOPS | Progress: (16/20) | 14.52 s
    [Task 12/25]  Current/Best:   15.16/  18.93 GFLOPS | Progress: (20/20) | 16.42 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.64/  17.28 GFLOPS | Progress: (4/20) | 3.64 s
    [Task 13/25]  Current/Best:   15.94/  21.11 GFLOPS | Progress: (8/20) | 6.26 s
    [Task 13/25]  Current/Best:   19.57/  21.67 GFLOPS | Progress: (12/20) | 9.21 s
    [Task 13/25]  Current/Best:   12.28/  21.67 GFLOPS | Progress: (16/20) | 12.63 s
    [Task 13/25]  Current/Best:   17.57/  21.67 GFLOPS | Progress: (20/20) | 14.99 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.62/  13.62 GFLOPS | Progress: (4/20) | 3.34 s
    [Task 14/25]  Current/Best:    5.94/  13.62 GFLOPS | Progress: (8/20) | 5.57 s
    [Task 14/25]  Current/Best:   18.74/  19.07 GFLOPS | Progress: (12/20) | 8.24 s
    [Task 14/25]  Current/Best:   16.41/  19.07 GFLOPS | Progress: (16/20) | 10.15 s
    [Task 14/25]  Current/Best:   17.38/  19.07 GFLOPS | Progress: (20/20) | 11.91 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.57/  17.57 GFLOPS | Progress: (4/20) | 5.89 s
    [Task  1/25]  Current/Best:    6.17/  17.57 GFLOPS | Progress: (8/20) | 8.79 s
    [Task  1/25]  Current/Best:   11.53/  22.85 GFLOPS | Progress: (12/20) | 11.22 s
    [Task  1/25]  Current/Best:   16.80/  22.87 GFLOPS | Progress: (16/20) | 12.88 s
    [Task  1/25]  Current/Best:   11.64/  23.94 GFLOPS | Progress: (20/20) | 14.58 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.33/  12.89 GFLOPS | Progress: (4/20) | 3.79 s
    [Task  2/25]  Current/Best:   13.96/  18.35 GFLOPS | Progress: (8/20) | 5.06 s
    [Task  2/25]  Current/Best:   21.30/  21.30 GFLOPS | Progress: (12/20) | 6.38 s
    [Task  2/25]  Current/Best:   12.68/  21.30 GFLOPS | Progress: (16/20) | 7.66 s
    [Task  2/25]  Current/Best:   20.15/  21.30 GFLOPS | Progress: (20/20) | 9.26 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.59 GFLOPS | Progress: (4/20) | 5.74 s
    [Task  3/25]  Current/Best:   15.56/  16.75 GFLOPS | Progress: (8/20) | 7.64 s
    [Task  3/25]  Current/Best:   14.92/  16.75 GFLOPS | Progress: (12/20) | 9.32 s
    [Task  3/25]  Current/Best:    7.20/  23.74 GFLOPS | Progress: (16/20) | 11.23 s
    [Task  3/25]  Current/Best:   12.11/  23.74 GFLOPS | Progress: (20/20) | 15.74 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.54/  20.48 GFLOPS | Progress: (4/20) | 2.27 s
    [Task  4/25]  Current/Best:    6.73/  20.48 GFLOPS | Progress: (8/20) | 7.00 s
    [Task  4/25]  Current/Best:   22.44/  22.44 GFLOPS | Progress: (12/20) | 11.95 s
    [Task  4/25]  Current/Best:   17.01/  22.44 GFLOPS | Progress: (16/20) | 14.34 s
    [Task  4/25]  Current/Best:   13.37/  22.44 GFLOPS | Progress: (20/20) | 16.30 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.60/  10.37 GFLOPS | Progress: (4/20) | 2.48 s
    [Task  5/25]  Current/Best:   11.92/  12.77 GFLOPS | Progress: (8/20) | 4.55 s
    [Task  5/25]  Current/Best:   11.86/  18.07 GFLOPS | Progress: (12/20) | 7.70 s
    [Task  5/25]  Current/Best:   11.96/  22.69 GFLOPS | Progress: (16/20) | 9.10 s
    [Task  5/25]  Current/Best:   12.15/  22.69 GFLOPS | Progress: (20/20) | 10.98 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.22/  20.71 GFLOPS | Progress: (4/20) | 4.03 s
    [Task  6/25]  Current/Best:   19.06/  20.71 GFLOPS | Progress: (8/20) | 5.76 s
    [Task  6/25]  Current/Best:   13.28/  20.71 GFLOPS | Progress: (12/20) | 7.69 s
    [Task  6/25]  Current/Best:   19.99/  20.71 GFLOPS | Progress: (16/20) | 9.94 s
    [Task  6/25]  Current/Best:    3.74/  20.71 GFLOPS | Progress: (20/20) | 12.48 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.28/  12.81 GFLOPS | Progress: (4/20) | 3.42 s
    [Task  7/25]  Current/Best:   20.31/  21.17 GFLOPS | Progress: (8/20) | 4.90 s
    [Task  7/25]  Current/Best:   16.18/  21.17 GFLOPS | Progress: (12/20) | 6.77 s
    [Task  7/25]  Current/Best:   12.27/  21.17 GFLOPS | Progress: (16/20) | 8.80 s
    [Task  7/25]  Current/Best:    6.40/  21.79 GFLOPS | Progress: (20/20) | 11.23 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:    9.86/  14.05 GFLOPS | Progress: (4/20) | 2.80 s
    [Task  8/25]  Current/Best:    9.62/  14.05 GFLOPS | Progress: (8/20) | 7.93 s
    [Task  8/25]  Current/Best:   12.65/  14.05 GFLOPS | Progress: (12/20) | 14.40 s
    [Task  8/25]  Current/Best:   18.84/  18.84 GFLOPS | Progress: (16/20) | 16.49 s
    [Task  8/25]  Current/Best:   19.87/  19.87 GFLOPS | Progress: (20/20) | 23.62 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.38/  15.75 GFLOPS | Progress: (4/20) | 18.75 s
    [Task  9/25]  Current/Best:   23.53/  23.53 GFLOPS | Progress: (8/20) | 20.41 s
    [Task  9/25]  Current/Best:    8.30/  23.53 GFLOPS | Progress: (12/20) | 22.90 s
    [Task  9/25]  Current/Best:   17.90/  23.53 GFLOPS | Progress: (16/20) | 25.64 s
    [Task  9/25]  Current/Best:    9.07/  23.53 GFLOPS | Progress: (20/20) | 34.26 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.22/  18.22 GFLOPS | Progress: (4/20) | 2.46 s
    [Task 10/25]  Current/Best:   15.48/  18.22 GFLOPS | Progress: (8/20) | 4.06 s
    [Task 10/25]  Current/Best:   12.93/  18.87 GFLOPS | Progress: (12/20) | 5.59 s
    [Task 10/25]  Current/Best:   19.09/  20.47 GFLOPS | Progress: (16/20) | 6.68 s
    [Task 10/25]  Current/Best:    8.80/  20.47 GFLOPS | Progress: (20/20
 ) | 8.22 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.23/  18.11 GFLOPS | Progress: (4/20) | 3.24 s
    [Task 11/25]  Current/Best:   16.79/  18.11 GFLOPS | Progress: (8/20) | 6.04 s
    [Task 11/25]  Current/Best:   18.11/  18.11 GFLOPS | Progress: (12/20) | 8.05 s
    [Task 11/25]  Current/Best:   13.43/  21.21 GFLOPS | Progress: (16/20) | 10.97 s
    [Task 11/25]  Current/Best:   19.56/  21.65 GFLOPS | Progress: (20/20) | 13.06 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.84/  18.04 GFLOPS | Progress: (4/20) | 5.67 s
    [Task 12/25]  Current/Best:    5.29/  18.04 GFLOPS | Progress: (8/20) | 9.57 s
    [Task 12/25]  Current/Best:   18.85/  18.87 GFLOPS | Progress: (12/20) | 11.54 s
    [Task 12/25]  Current/Best:   15.52/  18.87 GFLOPS | Progress: (16/20) | 14.47 s
    [Task 12/25]  Current/Best:   15.15/  18.87 GFLOPS | Progress: (20/20) | 16.41 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.65/  17.24 GFLOPS | Progress: (4/20) | 3.62 s
    [Task 13/25]  Current/Best:   16.14/  21.13 GFLOPS | Progress: (8/20) | 6.19 s
    [Task 13/25]  Current/Best:   19.60/  21.71 GFLOPS | Progress: (12/20) | 9.18 s
    [Task 13/25]  Current/Best:   12.30/  21.71 GFLOPS | Progress: (16/20) | 12.58 s
    [Task 13/25]  Current/Best:   17.64/  21.71 GFLOPS | Progress: (20/20) | 14.91 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.37/  13.37 GFLOPS | Progress: (4/20) | 3.22 s
    [Task 14/25]  Current/Best:    6.10/  13.37 GFLOPS | Progress: (8/20) | 5.42 s
    [Task 14/25]  Current/Best:   21.09/  21.09 GFLOPS | Progress: (12/20) | 8.05 s
    [Task 14/25]  Current/Best:   16.82/  21.09 GFLOPS | Progress: (16/20) | 9.92 s
    [Task 14/25]  Current/Best:   17.34/  21.09 GFLOPS | Progress: (20/20) | 11.68 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
      Done.
-
    [Task 15/25]  Current/Best:   16.11/  17.57 GFLOPS | Progress: (4/20) | 2.54 s
    [Task 15/25]  Current/Best:   14.32/  18.06 GFLOPS | Progress: (8/20) | 4.00 s
    [Task 15/25]  Current/Best:   10.41/  21.99 GFLOPS | Progress: (12/20) | 6.30 s
    [Task 15/25]  Current/Best:   20.43/  21.99 GFLOPS | Progress: (16/20) | 9.43 s
    [Task 15/25]  Current/Best:    9.66/  21.99 GFLOPS | Progress: (20/20) | 10.61 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.41/  20.41 GFLOPS | Progress: (4/20) | 2.79 s
    [Task 16/25]  Current/Best:    3.04/  20.41 GFLOPS | Progress: (8/20) | 4.38 s
    [Task 16/25]  Current/Best:   19.49/  20.41 GFLOPS | Progress: (12/20) | 5.57 s
    [Task 16/25]  Current/Best:   17.59/  20.41 GFLOPS | Progress: (16/20) | 6.93 s
    [Task 16/25]  Current/Best:   10.04/  22.42 GFLOPS | Progress: (20/20) | 9.08 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   12.89/  18.90 GFLOPS | Progress: (4/20) | 4.72 s
    [Task 17/25]  Current/Best:   14.52/  23.44 GFLOPS | Progress: (8/20) | 7.49 s
    [Task 17/25]  Current/Best:   17.26/  23.44 GFLOPS | Progress: (12/20) | 9.53 s
    [Task 17/25]  Current/Best:   16.58/  23.44 GFLOPS | Progress: (16/20) | 11.73 s
    [Task 17/25]  Current/Best:   10.06/  23.44 GFLOPS | Progress: (20/20) | 13.87 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.36/  17.69 GFLOPS | Progress: (4/20) | 3.67 s
    [Task 18/25]  Current/Best:   10.54/  18.21 GFLOPS | Progress: (8/20) | 7.37 s
    [Task 18/25]  Current/Best:   18.63/  18.63 GFLOPS | Progress: (12/20) | 9.30 s
    [Task 18/25]  Current/Best:   10.14/  18.63 GFLOPS | Progress: (16/20) | 13.17 s
    [Task 18/25]  Current/Best:   20.64/  20.64 GFLOPS | Progress: (20/20) | 14.71 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.29/  20.48 GFLOPS | Progress: (4/20) | 5.95 s
    [Task 19/25]  Current/Best:    2.61/  20.48 GFLOPS | Progress: (8/20) | 9.29 s
    [Task 19/25]  Current/Best:   16.63/  21.78 GFLOPS | Progress: (12/20) | 12.28 s
    [Task 19/25]  Current/Best:   15.24/  21.78 GFLOPS | Progress: (16/20) | 15.37 s
    [Task 19/25]  Current/Best:    2.70/  23.75 GFLOPS | Progress: (20/20) | 18.18 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.46/  15.45 GFLOPS | Progress: (4/20) | 3.23 s
    [Task 20/25]  Current/Best:    9.69/  15.45 GFLOPS | Progress: (8/20) | 6.77 s
    [Task 20/25]  Current/Best:    2.32/  16.48 GFLOPS | Progress: (12/20) | 10.70 s Done.
-
    [Task 20/25]  Current/Best:   12.43/  16.48 GFLOPS | Progress: (16/20) | 14.56 s
    [Task 20/25]  Current/Best:   10.41/  22.05 GFLOPS | Progress: (20/20) | 16.70 s Done.
-
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.42/  17.73 GFLOPS | Progress: (4/20) | 3.17 s
    [Task 21/25]  Current/Best:   14.66/  17.73 GFLOPS | Progress: (8/20) | 4.77 s
    [Task 21/25]  Current/Best:    1.61/  17.73 GFLOPS | Progress: (12/20) | 6.88 s
    [Task 21/25]  Current/Best:   18.05/  18.05 GFLOPS | Progress: (16/20) | 10.33 s
    [Task 21/25]  Current/Best:    4.47/  18.05 GFLOPS | Progress: (20/20) | 17.66 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.71/  16.96 GFLOPS | Progress: (4/20) | 2.58 s
    [Task 22/25]  Current/Best:    8.60/  22.12 GFLOPS | Progress: (8/20) | 4.61 s
    [Task 22/25]  Current/Best:   20.00/  22.12 GFLOPS | Progress: (12/20) | 6.98 s
    [Task 22/25]  Current/Best:   14.75/  22.12 GFLOPS | Progress: (16/20) | 9.10 s
    [Task 22/25]  Current/Best:   13.72/  22.12 GFLOPS | Progress: (20/20) |
  10.82 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.73/  20.93 GFLOPS | Progress: (4/20) | 3.12 s
    [Task 23/25]  Current/Best:   14.12/  20.93 GFLOPS | Progress: (8/20) | 6.51 s
    [Task 23/25]  Current/Best:   21.00/  21.84 GFLOPS | Progress: (12/20) | 8.31 s
    [Task 23/25]  Current/Best:    6.45/  21.84 GFLOPS | Progress: (16/20) | 15.35 s
    [Task 23/25]  Current/Best:    8.00/  21.84 GFLOPS | Progress: (20/20) | 19.53 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.54/   8.54 GFLOPS | Progress: (4/20) | 14.15 s
    [Task 24/25]  Current/Best:    3.72/   8.54 GFLOPS | Progress: (8/20) | 30.21 s
    [Task 24/25]  Current/Best:    4.31/   8.54 GFLOPS | Progress: (12/20) | 54.06 s
    [Task 24/25]  Current/Best:    5.76/   8.78 GFLOPS | Progress: (16/20) | 59.71 s Done.
-
    [Task 24/25]  Current/Best:    3.34/   8.78 GFLOPS | Progress: (20/20) | 65.75 s Done.
-
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.77 GFLOPS | Progress: (4/20) | 32.49 s
    [Task 25/25]  Current/Best:    5.94/   8.12 GFLOPS | Progress: (8/20) | 323.29 s
    [Task 25/25]  Current/Best:    6.01/   8.12 GFLOPS | Progress: (12/20) | 351.62 s
    [Task 25/25]  Current/Best:    5.78/   9.06 GFLOPS | Progress: (16/20) | 353.42 s
    [Task 25/25]  Current/Best:    2.85/   9.35 GFLOPS | Progress: (20/20) | 373.32 s
+
    [Task 15/25]  Current/Best:   16.19/  17.62 GFLOPS | Progress: (4/20) | 2.55 s
    [Task 15/25]  Current/Best:   14.46/  18.11 GFLOPS | Progress: (8/20) | 4.05 s
    [Task 15/25]  Current/Best:   10.33/  22.37 GFLOPS | Progress: (12/20) | 6.39 s
    [Task 15/25]  Current/Best:   20.42/  22.37 GFLOPS | Progress: (16/20) | 9.56 s
    [Task 15/25]  Current/Best:    9.65/  22.37 GFLOPS | Progress: (20/20) | 10.73 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.63/  20.63 GFLOPS | Progress: (4/20) | 2.80 s
    [Task 16/25]  Current/Best:    3.05/  20.63 GFLOPS | Progress: (8/20) | 4.39 s
    [Task 16/25]  Current/Best:   19.53/  20.63 GFLOPS | Progress: (12/20) | 5.58 s
    [Task 16/25]  Current/Best:   17.87/  20.63 GFLOPS | Progress: (16/20) | 6.95 s
    [Task 16/25]  Current/Best:   10.05/  22.33 GFLOPS | Progress: (20/20) | 9.08 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.16/  16.62 GFLOPS | Progress: (4/20) | 4.74 s
    [Task 17/25]  Current/Best:   14.40/  23.39 GFLOPS | Progress: (8/20) | 7.58 s
    [Task 17/25]  Current/Best:   16.88/  23.39 GFLOPS | Progress: (12/20) | 9.62 s
    [Task 17/25]  Current/Best:   16.55/  23.39 GFLOPS | Progress: (16/20) | 11.80 s
    [Task 17/25]  Current/Best:   10.05/  23.39 GFLOPS | Progress: (20/20) | 13.93 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.35/  18.18 GFLOPS | Progress: (4/20) | 3.68 s
    [Task 18/25]  Current/Best:   10.58/  19.37 GFLOPS | Progress: (8/20) | 7.31 s
    [Task 18/25]  Current/Best:   19.05/  19.37 GFLOPS | Progress: (12/20) | 9.21 s
    [Task 18/25]  Current/Best:   10.08/  19.37 GFLOPS | Progress: (16/20) | 13.07 s
    [Task 18/25]  Current/Best:   20.52/  20.52 GFLOPS | Progress: (20/20) | 14.56 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.29/  20.51 GFLOPS | Progress: (4/20) | 5.96 s
    [Task 19/25]  Current/Best:    2.61/  20.51 GFLOPS | Progress: (8/20) | 9.31 s
    [Task 19/25]  Current/Best:   20.47/  22.10 GFLOPS | Progress: (12/20) | 12.32 s
    [Task 19/25]  Current/Best:   14.40/  22.10 GFLOPS | Progress: (16/20) | 15.38 s
    [Task 19/25]  Current/Best:    2.71/  23.85 GFLOPS | Progress: (20/20) | 18.19 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    8.47/  15.20 GFLOPS | Progress: (4/20) | 3.25 s
    [Task 20/25]  Current/Best:    9.74/  15.20 GFLOPS | Progress: (8/20) | 6.78 s
    [Task 20/25]  Current/Best:    2.32/  16.54 GFLOPS | Progress: (12/20) | 10.69 s Done.
+
    [Task 20/25]  Current/Best:   12.34/  16.54 GFLOPS | Progress: (16/20) | 14.39 s
    [Task 20/25]  Current/Best:   11.79/  22.35 GFLOPS | Progress: (20/20) | 16.48 s Done.
+
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.43/  17.60 GFLOPS | Progress: (4/20) | 3.16 s
    [Task 21/25]  Current/Best:   14.67/  17.60 GFLOPS | Progress: (8/20) | 4.74 s
    [Task 21/25]  Current/Best:    1.61/  17.60 GFLOPS | Progress: (12/20) | 6.81 s
    [Task 21/25]  Current/Best:   18.03/  18.03 GFLOPS | Progress: (16/20) | 10.26 s
    [Task 21/25]  Current/Best:    4.46/  18.03 GFLOPS | Progress: (20/20) | 17.49 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.71/  16.95 GFLOPS | Progress: (4/20) | 2.56 s
    [Task 22/25]  Current/Best:    8.62/  21.89 GFLOPS | Progress: (8/20) | 4.53 s
    [Task 22/25]  Current/Best:   20.12/  21.89 GFLOPS | Progress: (12/20) | 6.91 s
    [Task 22/25]  Current/Best:   14.81/  21.89 GFLOPS | Progress: (16/20) | 9.02 s
    [Task 22/25]  Current/Best:   14.07/  21.89 GFLOPS | Progress: (20/20) |
  10.74 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.73/  20.53 GFLOPS | Progress: (4/20) | 3.12 s
    [Task 23/25]  Current/Best:   13.50/  20.53 GFLOPS | Progress: (8/20) | 6.41 s
    [Task 23/25]  Current/Best:   20.96/  21.68 GFLOPS | Progress: (12/20) | 8.23 s
    [Task 23/25]  Current/Best:    6.50/  21.68 GFLOPS | Progress: (16/20) | 15.23 s
    [Task 23/25]  Current/Best:    7.91/  21.68 GFLOPS | Progress: (20/20) | 19.42 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.64/   8.64 GFLOPS | Progress: (4/20) | 13.64 s
    [Task 24/25]  Current/Best:    2.15/   8.64 GFLOPS | Progress: (8/20) | 30.68 s
    [Task 24/25]  Current/Best:    4.54/   8.64 GFLOPS | Progress: (12/20) | 54.24 s
    [Task 24/25]  Current/Best:    6.37/   9.01 GFLOPS | Progress: (16/20) | 59.88 s Done.
+
    [Task 24/25]  Current/Best:    3.45/   9.01 GFLOPS | Progress: (20/20) | 65.92 s Done.
+
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.79 GFLOPS | Progress: (4/20) | 32.28 s
    [Task 25/25]  Current/Best:    6.40/   8.48 GFLOPS | Progress: (8/20) | 318.70 s
    [Task 25/25]  Current/Best:    5.95/   8.48 GFLOPS | Progress: (12/20) | 347.63 s
    [Task 25/25]  Current/Best:    5.95/   8.93 GFLOPS | Progress: (16/20) | 349.33 s
    [Task 25/25]  Current/Best:    2.89/   9.39 GFLOPS | Progress: (20/20) | 369.17 s
 
 
 The output from this tuning process will look something like this:
@@ -651,8 +651,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 409.65435204000187, 'median': 409.6145139500095, 'std': 0.5035102469973483}
-    unoptimized: {'mean': 492.0485737900015, 'median': 491.6691768000021, 'std': 0.8640093566493087}
+    optimized: {'mean': 407.073206069997, 'median': 407.2628067499977, 'std': 0.6441496962374219}
+    unoptimized: {'mean': 490.8931876000008, 'median': 490.8230106499957, 'std': 0.5127112764728715}
 
 
 
@@ -672,7 +672,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 16 minutes  10.276 seconds)
+   **Total running time of the script:** ( 16 minutes  4.879 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 0e11edbf1..8066ec160 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -235,7 +235,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.298e-07 secs/op
+    1.309e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index ea1810aed..f0f50dc80 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -233,7 +233,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x22b57700)), stage(b, placeholder(b, 0x12236330)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(mi [...]
+    [stage(a, placeholder(a, 0xd0f2380)), stage(b, placeholder(b, 0x20c37150)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index 91af2d44d..01b031a62 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,17 +5,17 @@
 
 Computation times
 =================
-**18:55.209** total execution time for **tutorial** files:
+**19:07.558** total execution time for **tutorial** files:
 
-- **16:10.276**: :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)
-- **01:00.330**: :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
-- **00:50.794**: :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``)
-- **00:25.903**: :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)
-- **00:25.518**: :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)
-- **00:01.369**: :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)
-- **00:00.704**: :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)
-- **00:00.190**: :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
-- **00:00.032**: :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)
-- **00:00.031**: :ref:`sphx_glr_tutorial_install.py` (``install.py``)
-- **00:00.030**: :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)
-- **00:00.030**: :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)
+- **16:04.879**: :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)
+- **01:11.014**: :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``)
+- **01:00.347**: :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
+- **00:25.407**: :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)
+- **00:23.610**: :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)
+- **00:01.315**: :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)
+- **00:00.690**: :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)
+- **00:00.182**: :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
+- **00:00.030**: :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)
+- **00:00.029**: :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)
+- **00:00.028**: :ref:`sphx_glr_tutorial_install.py` (``install.py``)
+- **00:00.027**: :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 62fb5f5c1..94412b35c 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -243,8 +243,8 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000007
-    naive: 0.000008
+    Numpy running time: 0.000008
+    naive: 0.000006
 
 
 
@@ -438,10 +438,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    7.436289999986911e-06                    1.0
-                   naive              7.6159e-06      1.0241531731566957
-                parallel              6.0648e-06      0.8155679781195562
-                  vector             2.46243e-05      3.3113689756643896
+                   numpy    8.178099999440747e-06                    1.0
+                   naive              5.8726e-06      0.7180885536251199
+                parallel              6.0821e-06      0.7437057507753536
+                  vector             2.45371e-05      3.0003423780190936
 
 
 
@@ -830,7 +830,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.017948
+    Numpy running time: 0.017574
 
 
 
@@ -886,7 +886,7 @@ optimizations.
 
  .. code-block:: none
 
-    none: 3.396838
+    none: 3.388776
 
 
 
@@ -985,7 +985,7 @@ schedule.
 
  .. code-block:: none
 
-    blocking: 0.291171
+    blocking: 0.299878
 
 
 
@@ -1077,7 +1077,7 @@ already cache friendly from our previous optimizations.
 
  .. code-block:: none
 
-    vectorization: 0.323090
+    vectorization: 0.331291
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1149,7 +1149,7 @@ more cache friendly.
 
  .. code-block:: none
 
-    loop permutation: 0.116254
+    loop permutation: 0.113738
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1246,7 +1246,7 @@ optimized schedule.
 
  .. code-block:: none
 
-    array packing: 0.110528
+    array packing: 0.108084
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1337,7 +1337,7 @@ to `C` when all the block results are ready.
 
  .. code-block:: none
 
-    block caching: 0.110834
+    block caching: 0.111185
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1421,7 +1421,7 @@ of thread-level parallelization.
 
  .. code-block:: none
 
-    parallelization: 0.144285
+    parallelization: 0.144311
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1500,13 +1500,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none      3.3968376278999997                     1.0
-                blocking            0.2911707229     0.08571817519579475
-           vectorization             0.323090222     0.09511500324486853
-        loop permutation     0.11625381580000001     0.03422413095202042
-           array packing            0.1105277781     0.03253843433438728
-           block caching     0.11083402419999999     0.03262859057190792
-         parallelization            0.1442845978     0.04247615388351663
+                    none      3.3887764241999996                     1.0
+                blocking            0.2998780702     0.08849154758587925
+           vectorization            0.3312914261     0.09776137007274216
+        loop permutation     0.11373797159999999    0.033563138242987074
+           array packing             0.108083941     0.03189468040091071
+           block caching     0.11118478700000001     0.03280971450521342
+         parallelization             0.144311289     0.04258507229023469
 
 
 
@@ -1543,7 +1543,7 @@ the computation for specific platforms.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  0.330 seconds)
+   **Total running time of the script:** ( 1 minutes  0.347 seconds)
 
 
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
diff --git a/docs/commit_hash b/docs/commit_hash
index 3a3b1f81f..5d218762a 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-a3a4155943cd1a8ced35060902907cde2ba44cd8
+de21c8f2ef507587fdcc99b851404de5aeeb5a16
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 3a64bb0cb..c0696ad47 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -401,7 +401,7 @@
 </div>
 <img alt="../../_images/sphx_glr_from_mxnet_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_from_mxnet_001.png" />
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip2e9fcbdd-04fb-48d4-9551-2fca667db007 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip241648fa-9037-4f2c-98f8-146ee42e6cc7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index a7f3ab72e..fe16a238f 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -406,49 +406,1836 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
-  0%|          | 16.0k/41.5M [00:00&lt;07:45, 93.5kB/s]
-  0%|          | 48.0k/41.5M [00:00&lt;04:53, 148kB/s]
-  0%|          | 96.0k/41.5M [00:00&lt;03:28, 208kB/s]
-  0%|          | 168k/41.5M [00:00&lt;02:28, 291kB/s]
-  1%|          | 336k/41.5M [00:00&lt;01:20, 538kB/s]
-  2%|1         | 648k/41.5M [00:01&lt;00:44, 971kB/s]
-  3%|3         | 1.27M/41.5M [00:01&lt;00:22, 1.88MB/s]
-  6%|6         | 2.53M/41.5M [00:01&lt;00:11, 3.68MB/s]
- 10%|9         | 4.03M/41.5M [00:01&lt;00:07, 5.32MB/s]
- 13%|#3        | 5.52M/41.5M [00:01&lt;00:05, 7.14MB/s]
- 16%|#5        | 6.59M/41.5M [00:01&lt;00:04, 8.01MB/s]
- 18%|#7        | 7.44M/41.5M [00:01&lt;00:04, 7.54MB/s]
- 20%|#9        | 8.21M/41.5M [00:02&lt;00:05, 6.01MB/s]
- 23%|##2       | 9.52M/41.5M [00:02&lt;00:05, 6.58MB/s]
- 25%|##4       | 10.3M/41.5M [00:02&lt;00:05, 6.42MB/s]
- 28%|##8       | 11.8M/41.5M [00:02&lt;00:04, 7.23MB/s]
- 32%|###1      | 13.3M/41.5M [00:02&lt;00:03, 7.76MB/s]
- 35%|###4      | 14.4M/41.5M [00:03&lt;00:04, 6.95MB/s]
- 38%|###8      | 15.9M/41.5M [00:03&lt;00:03, 7.52MB/s]
- 40%|####      | 16.7M/41.5M [00:03&lt;00:03, 6.65MB/s]
- 44%|####3     | 18.1M/41.5M [00:03&lt;00:03, 7.24MB/s]
- 47%|####7     | 19.6M/41.5M [00:03&lt;00:02, 7.71MB/s]
- 51%|#####     | 21.1M/41.5M [00:03&lt;00:02, 8.07MB/s]
- 55%|#####4    | 22.6M/41.5M [00:04&lt;00:02, 8.32MB/s]
- 58%|#####8    | 24.1M/41.5M [00:04&lt;00:01, 9.53MB/s]
- 62%|######1   | 25.6M/41.5M [00:04&lt;00:01, 10.2MB/s]
- 64%|######4   | 26.6M/41.5M [00:04&lt;00:02, 7.20MB/s]
- 67%|######6   | 27.6M/41.5M [00:04&lt;00:02, 6.90MB/s]
- 68%|######8   | 28.4M/41.5M [00:04&lt;00:02, 6.23MB/s]
- 71%|#######1  | 29.5M/41.5M [00:05&lt;00:02, 6.27MB/s]
- 74%|#######3  | 30.6M/41.5M [00:05&lt;00:01, 6.32MB/s]
- 76%|#######6  | 31.7M/41.5M [00:05&lt;00:01, 6.39MB/s]
- 79%|#######9  | 32.8M/41.5M [00:05&lt;00:01, 6.95MB/s]
- 82%|########1 | 33.9M/41.5M [00:05&lt;00:01, 7.26MB/s]
- 84%|########4 | 35.0M/41.5M [00:05&lt;00:00, 7.11MB/s]
- 87%|########7 | 36.2M/41.5M [00:06&lt;00:00, 7.65MB/s]
- 89%|########9 | 36.9M/41.5M [00:06&lt;00:00, 7.29MB/s]
- 91%|######### | 37.7M/41.5M [00:06&lt;00:00, 6.29MB/s]
- 93%|#########2| 38.5M/41.5M [00:06&lt;00:00, 5.96MB/s]
- 94%|#########4| 39.1M/41.5M [00:06&lt;00:00, 5.12MB/s]
- 97%|#########6| 40.1M/41.5M [00:06&lt;00:00, 5.32MB/s]
- 99%|#########8| 40.9M/41.5M [00:07&lt;00:00, 5.20MB/s]
-100%|##########| 41.5M/41.5M [00:07&lt;00:00, 6.12MB/s]
+  0%|          | 16.0k/41.5M [00:00&lt;15:35, 46.5kB/s]
+  0%|          | 56.0k/41.5M [00:00&lt;05:50, 124kB/s]
+  0%|          | 72.0k/41.5M [00:00&lt;06:26, 112kB/s]
+  0%|          | 88.0k/41.5M [00:00&lt;06:51, 105kB/s]
+  0%|          | 104k/41.5M [00:01&lt;07:08, 101kB/s]
+  0%|          | 120k/41.5M [00:01&lt;07:20, 98.5kB/s]
+  0%|          | 136k/41.5M [00:01&lt;07:28, 96.7kB/s]
+  0%|          | 152k/41.5M [00:01&lt;07:33, 95.5kB/s]
+  0%|          | 168k/41.5M [00:01&lt;07:37, 94.6kB/s]
+  0%|          | 184k/41.5M [00:01&lt;07:40, 94.1kB/s]
+  0%|          | 200k/41.5M [00:02&lt;10:02, 71.9kB/s]
+  1%|          | 216k/41.5M [00:02&lt;11:41, 61.7kB/s]
+  1%|          | 240k/41.5M [00:02&lt;09:08, 78.9kB/s]
+  1%|          | 256k/41.5M [00:03&lt;15:05, 47.7kB/s]
+  1%|          | 264k/41.5M [00:03&lt;15:09, 47.5kB/s]
+  1%|          | 272k/41.5M [00:03&lt;15:14, 47.3kB/s]
+  1%|          | 280k/41.5M [00:04&lt;15:17, 47.1kB/s]
+  1%|          | 288k/41.5M [00:04&lt;15:20, 46.9kB/s]
+  1%|          | 296k/41.5M [00:04&lt;15:23, 46.8kB/s]
+  1%|          | 304k/41.5M [00:04&lt;15:25, 46.7kB/s]
+  1%|          | 312k/41.5M [00:04&lt;15:26, 46.6kB/s]
+  1%|          | 320k/41.5M [00:04&lt;16:28, 43.7kB/s]
+  1%|          | 328k/41.5M [00:05&lt;19:36, 36.7kB/s]
+  1%|          | 336k/41.5M [00:05&lt;18:24, 39.1kB/s]
+  1%|          | 352k/41.5M [00:05&lt;14:23, 50.0kB/s]
+  1%|          | 360k/41.5M [00:06&lt;21:21, 33.7kB/s]
+  1%|          | 376k/41.5M [00:06&lt;20:26, 35.2kB/s]
+  1%|          | 384k/41.5M [00:06&lt;19:18, 37.2kB/s]
+  1%|          | 392k/41.5M [00:07&lt;21:16, 33.8kB/s]
+  1%|          | 400k/41.5M [00:07&lt;23:31, 30.5kB/s]
+  1%|          | 408k/41.5M [00:07&lt;21:37, 33.2kB/s]
+  1%|          | 416k/41.5M [00:07&lt;23:56, 30.0kB/s]
+  1%|          | 424k/41.5M [00:08&lt;21:31, 33.3kB/s]
+  1%|1         | 432k/41.5M [00:08&lt;19:46, 36.3kB/s]
+  1%|1         | 440k/41.5M [00:08&lt;23:02, 31.1kB/s]
+  1%|1         | 456k/41.5M [00:08&lt;18:05, 39.6kB/s]
+  1%|1         | 464k/41.5M [00:09&lt;19:03, 37.6kB/s]
+  1%|1         | 472k/41.5M [00:09&lt;18:07, 39.6kB/s]
+  1%|1         | 480k/41.5M [00:09&lt;17:23, 41.2kB/s]
+  1%|1         | 488k/41.5M [00:09&lt;16:51, 42.5kB/s]
+  1%|1         | 496k/41.5M [00:09&lt;16:27, 43.6kB/s]
+  1%|1         | 504k/41.5M [00:10&lt;14:15, 50.3kB/s]
+  1%|1         | 512k/41.5M [00:10&lt;14:36, 49.1kB/s]
+  1%|1         | 528k/41.5M [00:10&lt;12:57, 55.2kB/s]
+  1%|1         | 536k/41.5M [00:10&lt;13:34, 52.8kB/s]
+  1%|1         | 544k/41.5M [00:10&lt;13:18, 53.8kB/s]
+  1%|1         | 560k/41.5M [00:10&lt;11:31, 62.0kB/s]
+  1%|1         | 568k/41.5M [00:11&lt;11:46, 60.8kB/s]
+  1%|1         | 584k/41.5M [00:11&lt;10:42, 66.7kB/s]
+  1%|1         | 592k/41.5M [00:11&lt;11:07, 64.3kB/s]
+  1%|1         | 608k/41.5M [00:11&lt;09:47, 73.0kB/s]
+  1%|1         | 624k/41.5M [00:11&lt;09:02, 79.0kB/s]
+  2%|1         | 640k/41.5M [00:12&lt;08:35, 83.2kB/s]
+  2%|1         | 656k/41.5M [00:12&lt;08:17, 86.1kB/s]
+  2%|1         | 672k/41.5M [00:12&lt;08:06, 88.1kB/s]
+  2%|1         | 688k/41.5M [00:12&lt;07:58, 89.5kB/s]
+  2%|1         | 704k/41.5M [00:12&lt;10:15, 69.6kB/s]
+  2%|1         | 736k/41.5M [00:13&lt;07:35, 93.9kB/s]
+  2%|1         | 752k/41.5M [00:13&lt;07:36, 93.5kB/s]
+  2%|1         | 768k/41.5M [00:13&lt;07:37, 93.3kB/s]
+  2%|1         | 784k/41.5M [00:13&lt;07:38, 93.2kB/s]
+  2%|1         | 800k/41.5M [00:13&lt;07:38, 93.1kB/s]
+  2%|1         | 816k/41.5M [00:13&lt;07:38, 93.0kB/s]
+  2%|1         | 832k/41.5M [00:14&lt;07:13, 98.4kB/s]
+  2%|1         | 848k/41.5M [00:14&lt;09:35, 74.1kB/s]
+  2%|2         | 880k/41.5M [00:14&lt;07:17, 97.3kB/s]
+  2%|2         | 896k/41.5M [00:14&lt;07:23, 95.9kB/s]
+  2%|2         | 912k/41.5M [00:15&lt;07:27, 95.1kB/s]
+  2%|2         | 928k/41.5M [00:15&lt;07:29, 94.7kB/s]
+  2%|2         | 944k/41.5M [00:15&lt;07:31, 94.1kB/s]
+  2%|2         | 960k/41.5M [00:15&lt;11:56, 59.4kB/s]
+  2%|2         | 0.98M/41.5M [00:16&lt;07:27, 95.0kB/s]
+  2%|2         | 0.99M/41.5M [00:16&lt;07:29, 94.5kB/s]
+  2%|2         | 1.01M/41.5M [00:16&lt;09:22, 75.5kB/s]
+  2%|2         | 1.03M/41.5M [00:16&lt;07:53, 89.5kB/s]
+  3%|2         | 1.05M/41.5M [00:16&lt;07:49, 90.3kB/s]
+  3%|2         | 1.06M/41.5M [00:17&lt;07:46, 90.9kB/s]
+  3%|2         | 1.08M/41.5M [00:17&lt;07:43, 91.4kB/s]
+  3%|2         | 1.09M/41.5M [00:17&lt;09:48, 71.9kB/s]
+  3%|2         | 1.11M/41.5M [00:17&lt;09:41, 72.8kB/s]
+  3%|2         | 1.12M/41.5M [00:18&lt;10:01, 70.4kB/s]
+  3%|2         | 1.13M/41.5M [00:18&lt;09:13, 76.4kB/s]
+  3%|2         | 1.14M/41.5M [00:18&lt;10:21, 68.1kB/s]
+  3%|2         | 1.16M/41.5M [00:18&lt;09:23, 75.1kB/s]
+  3%|2         | 1.17M/41.5M [00:18&lt;08:47, 80.2kB/s]
+  3%|2         | 1.18M/41.5M [00:18&lt;10:01, 70.3kB/s]
+  3%|2         | 1.20M/41.5M [00:19&lt;11:50, 59.4kB/s]
+  3%|2         | 1.20M/41.5M [00:19&lt;12:31, 56.2kB/s]
+  3%|2         | 1.23M/41.5M [00:19&lt;09:03, 77.7kB/s]
+  3%|2         | 1.24M/41.5M [00:19&lt;09:10, 76.7kB/s]
+  3%|3         | 1.25M/41.5M [00:19&lt;09:34, 73.4kB/s]
+  3%|3         | 1.27M/41.5M [00:20&lt;09:30, 73.9kB/s]
+  3%|3         | 1.27M/41.5M [00:20&lt;09:54, 71.0kB/s]
+  3%|3         | 1.29M/41.5M [00:20&lt;09:04, 77.5kB/s]
+  3%|3         | 1.30M/41.5M [00:20&lt;08:33, 82.1kB/s]
+  3%|3         | 1.31M/41.5M [00:20&lt;10:33, 66.5kB/s]
+  3%|3         | 1.33M/41.5M [00:21&lt;09:29, 74.0kB/s]
+  3%|3         | 1.34M/41.5M [00:21&lt;09:27, 74.2kB/s]
+  3%|3         | 1.35M/41.5M [00:21&lt;09:50, 71.3kB/s]
+  3%|3         | 1.36M/41.5M [00:21&lt;10:59, 63.8kB/s]
+  3%|3         | 1.38M/41.5M [00:21&lt;09:40, 72.5kB/s]
+  3%|3         | 1.39M/41.5M [00:21&lt;08:55, 78.6kB/s]
+  3%|3         | 1.41M/41.5M [00:22&lt;08:27, 82.8kB/s]
+  3%|3         | 1.42M/41.5M [00:22&lt;08:09, 85.8kB/s]
+  3%|3         | 1.44M/41.5M [00:22&lt;07:57, 87.9kB/s]
+  4%|3         | 1.45M/41.5M [00:22&lt;07:49, 89.3kB/s]
+  4%|3         | 1.47M/41.5M [00:23&lt;10:03, 69.5kB/s]
+  4%|3         | 1.48M/41.5M [00:23&lt;10:58, 63.7kB/s]
+  4%|3         | 1.48M/41.5M [00:23&lt;14:54, 46.9kB/s]
+  4%|3         | 1.51M/41.5M [00:23&lt;12:37, 55.3kB/s]
+  4%|3         | 1.52M/41.5M [00:24&lt;13:04, 53.5kB/s]
+  4%|3         | 1.52M/41.5M [00:24&lt;13:28, 51.8kB/s]
+  4%|3         | 1.53M/41.5M [00:24&lt;13:50, 50.5kB/s]
+  4%|3         | 1.54M/41.5M [00:24&lt;14:07, 49.4kB/s]
+  4%|3         | 1.55M/41.5M [00:24&lt;12:09, 57.4kB/s]
+  4%|3         | 1.56M/41.5M [00:25&lt;18:49, 37.1kB/s]
+  4%|3         | 1.59M/41.5M [00:25&lt;13:12, 52.8kB/s]
+  4%|3         | 1.59M/41.5M [00:25&lt;12:51, 54.2kB/s]
+  4%|3         | 1.60M/41.5M [00:25&lt;13:19, 52.3kB/s]
+  4%|3         | 1.61M/41.5M [00:26&lt;17:13, 40.5kB/s]
+  4%|3         | 1.62M/41.5M [00:26&lt;17:04, 40.8kB/s]
+  4%|3         | 1.64M/41.5M [00:27&lt;16:19, 42.7kB/s]
+  4%|3         | 1.65M/41.5M [00:27&lt;16:02, 43.4kB/s]
+  4%|3         | 1.66M/41.5M [00:27&lt;15:48, 44.0kB/s]
+  4%|4         | 1.66M/41.5M [00:27&lt;15:36, 44.6kB/s]
+  4%|4         | 1.67M/41.5M [00:27&lt;15:27, 45.0kB/s]
+  4%|4         | 1.68M/41.5M [00:27&lt;15:19, 45.4kB/s]
+  4%|4         | 1.69M/41.5M [00:28&lt;19:25, 35.8kB/s]
+  4%|4         | 1.70M/41.5M [00:28&lt;18:43, 37.1kB/s]
+  4%|4         | 1.72M/41.5M [00:28&lt;14:21, 48.4kB/s]
+  4%|4         | 1.73M/41.5M [00:29&lt;17:44, 39.2kB/s]
+  4%|4         | 1.73M/41.5M [00:29&lt;17:05, 40.7kB/s]
+  4%|4         | 1.74M/41.5M [00:29&lt;16:33, 42.0kB/s]
+  4%|4         | 1.75M/41.5M [00:29&lt;16:07, 43.0kB/s]
+  4%|4         | 1.76M/41.5M [00:29&lt;15:48, 43.9kB/s]
+  4%|4         | 1.77M/41.5M [00:30&lt;16:35, 41.9kB/s]
+  4%|4         | 1.77M/41.5M [00:30&lt;15:05, 46.0kB/s]
+  4%|4         | 1.78M/41.5M [00:30&lt;15:02, 46.1kB/s]
+  4%|4         | 1.79M/41.5M [00:30&lt;15:01, 46.2kB/s]
+  4%|4         | 1.80M/41.5M [00:30&lt;14:59, 46.3kB/s]
+  4%|4         | 1.81M/41.5M [00:30&lt;11:33, 60.0kB/s]
+  4%|4         | 1.82M/41.5M [00:31&lt;12:23, 56.0kB/s]
+  4%|4         | 1.83M/41.5M [00:31&lt;13:03, 53.1kB/s]
+  4%|4         | 1.84M/41.5M [00:31&lt;10:40, 64.9kB/s]
+  4%|4         | 1.85M/41.5M [00:31&lt;11:39, 59.4kB/s]
+  5%|4         | 1.87M/41.5M [00:31&lt;09:58, 69.4kB/s]
+  5%|4         | 1.88M/41.5M [00:32&lt;09:03, 76.3kB/s]
+  5%|4         | 1.89M/41.5M [00:32&lt;10:16, 67.4kB/s]
+  5%|4         | 1.91M/41.5M [00:32&lt;09:13, 75.0kB/s]
+  5%|4         | 1.92M/41.5M [00:32&lt;08:36, 80.3kB/s]
+  5%|4         | 1.93M/41.5M [00:32&lt;09:51, 70.2kB/s]
+  5%|4         | 1.95M/41.5M [00:32&lt;08:59, 76.8kB/s]
+  5%|4         | 1.96M/41.5M [00:33&lt;08:27, 81.6kB/s]
+  5%|4         | 1.98M/41.5M [00:33&lt;10:33, 65.4kB/s]
+  5%|4         | 2.00M/41.5M [00:33&lt;08:15, 83.6kB/s]
+  5%|4         | 2.02M/41.5M [00:33&lt;08:01, 86.0kB/s]
+  5%|4         | 2.03M/41.5M [00:33&lt;07:51, 87.8kB/s]
+  5%|4         | 2.05M/41.5M [00:34&lt;07:43, 89.2kB/s]
+  5%|4         | 2.06M/41.5M [00:34&lt;07:38, 90.2kB/s]
+  5%|5         | 2.09M/41.5M [00:34&lt;06:36, 104kB/s]
+  5%|5         | 2.10M/41.5M [00:34&lt;06:49, 101kB/s]
+  5%|5         | 2.12M/41.5M [00:34&lt;06:58, 98.5kB/s]
+  5%|5         | 2.13M/41.5M [00:35&lt;09:12, 74.7kB/s]
+  5%|5         | 2.16M/41.5M [00:35&lt;07:35, 90.6kB/s]
+  5%|5         | 2.17M/41.5M [00:35&lt;07:32, 91.1kB/s]
+  5%|5         | 2.19M/41.5M [00:35&lt;07:30, 91.6kB/s]
+  5%|5         | 2.20M/41.5M [00:35&lt;07:28, 91.9kB/s]
+  5%|5         | 2.22M/41.5M [00:36&lt;07:26, 92.2kB/s]
+  5%|5         | 2.23M/41.5M [00:36&lt;07:25, 92.3kB/s]
+  5%|5         | 2.26M/41.5M [00:36&lt;06:28, 106kB/s]
+  5%|5         | 2.27M/41.5M [00:36&lt;08:41, 78.9kB/s]
+  6%|5         | 2.30M/41.5M [00:36&lt;07:18, 93.6kB/s]
+  6%|5         | 2.31M/41.5M [00:37&lt;07:19, 93.4kB/s]
+  6%|5         | 2.33M/41.5M [00:37&lt;07:20, 93.2kB/s]
+  6%|5         | 2.34M/41.5M [00:37&lt;07:20, 93.1kB/s]
+  6%|5         | 2.36M/41.5M [00:37&lt;07:21, 93.0kB/s]
+  6%|5         | 2.38M/41.5M [00:37&lt;07:21, 92.9kB/s]
+  6%|5         | 2.39M/41.5M [00:38&lt;07:54, 86.3kB/s]
+  6%|5         | 2.41M/41.5M [00:38&lt;07:12, 94.9kB/s]
+  6%|5         | 2.42M/41.5M [00:38&lt;07:14, 94.2kB/s]
+  6%|5         | 2.44M/41.5M [00:38&lt;08:22, 81.6kB/s]
+  6%|5         | 2.45M/41.5M [00:38&lt;09:09, 74.5kB/s]
+  6%|5         | 2.46M/41.5M [00:39&lt;12:42, 53.6kB/s]
+  6%|5         | 2.47M/41.5M [00:39&lt;12:20, 55.3kB/s]
+  6%|5         | 2.48M/41.5M [00:39&lt;11:13, 60.8kB/s]
+  6%|6         | 2.49M/41.5M [00:39&lt;11:56, 57.1kB/s]
+  6%|6         | 2.50M/41.5M [00:39&lt;11:40, 58.3kB/s]
+  6%|6         | 2.52M/41.5M [00:40&lt;10:43, 63.5kB/s]
+  6%|6         | 2.52M/41.5M [00:40&lt;11:35, 58.7kB/s]
+  6%|6         | 2.53M/41.5M [00:40&lt;11:24, 59.6kB/s]
+  6%|6         | 2.55M/41.5M [00:40&lt;10:32, 64.6kB/s]
+  6%|6         | 2.55M/41.5M [00:40&lt;10:36, 64.2kB/s]
+  6%|6         | 2.57M/41.5M [00:40&lt;09:19, 73.0kB/s]
+  6%|6         | 2.59M/41.5M [00:41&lt;08:36, 79.0kB/s]
+  6%|6         | 2.59M/41.5M [00:41&lt;09:50, 69.1kB/s]
+  6%|6         | 2.61M/41.5M [00:41&lt;11:36, 58.5kB/s]
+  6%|6         | 2.63M/41.5M [00:41&lt;08:38, 78.6kB/s]
+  6%|6         | 2.65M/41.5M [00:42&lt;08:49, 77.0kB/s]
+  6%|6         | 2.66M/41.5M [00:42&lt;09:10, 74.0kB/s]
+  6%|6         | 2.66M/41.5M [00:42&lt;10:15, 66.2kB/s]
+  6%|6         | 2.67M/41.5M [00:42&lt;11:12, 60.6kB/s]
+  6%|6         | 2.69M/41.5M [00:42&lt;12:32, 54.1kB/s]
+  6%|6         | 2.70M/41.5M [00:43&lt;13:53, 48.8kB/s]
+  7%|6         | 2.71M/41.5M [00:43&lt;12:54, 52.5kB/s]
+  7%|6         | 2.72M/41.5M [00:43&lt;13:16, 51.1kB/s]
+  7%|6         | 2.73M/41.5M [00:43&lt;14:15, 47.5kB/s]
+  7%|6         | 2.75M/41.5M [00:44&lt;11:48, 57.4kB/s]
+  7%|6         | 2.76M/41.5M [00:44&lt;12:21, 54.8kB/s]
+  7%|6         | 2.77M/41.5M [00:44&lt;12:51, 52.6kB/s]
+  7%|6         | 2.78M/41.5M [00:44&lt;10:41, 63.3kB/s]
+  7%|6         | 2.79M/41.5M [00:44&lt;11:31, 58.6kB/s]
+  7%|6         | 2.80M/41.5M [00:45&lt;09:54, 68.3kB/s]
+  7%|6         | 2.82M/41.5M [00:45&lt;08:58, 75.3kB/s]
+  7%|6         | 2.83M/41.5M [00:45&lt;10:06, 66.9kB/s]
+  7%|6         | 2.84M/41.5M [00:45&lt;09:04, 74.5kB/s]
+  7%|6         | 2.86M/41.5M [00:45&lt;10:56, 61.7kB/s]
+  7%|6         | 2.88M/41.5M [00:46&lt;08:21, 80.7kB/s]
+  7%|6         | 2.90M/41.5M [00:46&lt;08:02, 83.8kB/s]
+  7%|7         | 2.91M/41.5M [00:46&lt;07:49, 86.2kB/s]
+  7%|7         | 2.93M/41.5M [00:46&lt;07:39, 87.9kB/s]
+  7%|7         | 2.95M/41.5M [00:46&lt;07:32, 89.3kB/s]
+  7%|7         | 2.96M/41.5M [00:47&lt;09:36, 70.1kB/s]
+  7%|7         | 2.98M/41.5M [00:47&lt;07:45, 86.8kB/s]
+  7%|7         | 3.00M/41.5M [00:47&lt;08:04, 83.3kB/s]
+  7%|7         | 3.02M/41.5M [00:47&lt;09:23, 71.6kB/s]
+  7%|7         | 3.03M/41.5M [00:48&lt;09:15, 72.6kB/s]
+  7%|7         | 3.04M/41.5M [00:48&lt;10:08, 66.2kB/s]
+  7%|7         | 3.05M/41.5M [00:48&lt;11:01, 60.9kB/s]
+  7%|7         | 3.07M/41.5M [00:48&lt;09:48, 68.5kB/s]
+  7%|7         | 3.08M/41.5M [00:48&lt;11:17, 59.4kB/s]
+  7%|7         | 3.09M/41.5M [00:49&lt;11:57, 56.1kB/s]
+  7%|7         | 3.09M/41.5M [00:49&lt;12:32, 53.5kB/s]
+  7%|7         | 3.10M/41.5M [00:49&lt;13:00, 51.6kB/s]
+  7%|7         | 3.11M/41.5M [00:49&lt;13:23, 50.1kB/s]
+  8%|7         | 3.12M/41.5M [00:49&lt;13:40, 49.0kB/s]
+  8%|7         | 3.13M/41.5M [00:50&lt;14:00, 47.8kB/s]
+  8%|7         | 3.15M/41.5M [00:50&lt;11:25, 58.7kB/s]
+  8%|7         | 3.16M/41.5M [00:50&lt;12:04, 55.5kB/s]
+  8%|7         | 3.16M/41.5M [00:50&lt;12:35, 53.2kB/s]
+  8%|7         | 3.18M/41.5M [00:51&lt;13:21, 50.1kB/s]
+  8%|7         | 3.20M/41.5M [00:51&lt;11:06, 60.2kB/s]
+  8%|7         | 3.20M/41.5M [00:51&lt;11:47, 56.7kB/s]
+  8%|7         | 3.21M/41.5M [00:51&lt;12:23, 54.0kB/s]
+  8%|7         | 3.22M/41.5M [00:51&lt;12:53, 51.9kB/s]
+  8%|7         | 3.23M/41.5M [00:52&lt;17:01, 39.3kB/s]
+  8%|7         | 3.25M/41.5M [00:52&lt;10:35, 63.1kB/s]
+  8%|7         | 3.26M/41.5M [00:52&lt;11:21, 58.9kB/s]
+  8%|7         | 3.27M/41.5M [00:52&lt;12:27, 53.6kB/s]
+  8%|7         | 3.29M/41.5M [00:53&lt;10:39, 62.7kB/s]
+  8%|7         | 3.30M/41.5M [00:53&lt;11:23, 58.6kB/s]
+  8%|7         | 3.30M/41.5M [00:53&lt;12:02, 55.4kB/s]
+  8%|8         | 3.32M/41.5M [00:53&lt;11:34, 57.6kB/s]
+  8%|8         | 3.34M/41.5M [00:53&lt;10:00, 66.6kB/s]
+  8%|8         | 3.34M/41.5M [00:54&lt;10:53, 61.2kB/s]
+  8%|8         | 3.35M/41.5M [00:54&lt;11:40, 57.1kB/s]
+  8%|8         | 3.37M/41.5M [00:54&lt;09:54, 67.2kB/s]
+  8%|8         | 3.38M/41.5M [00:54&lt;08:55, 74.6kB/s]
+  8%|8         | 3.40M/41.5M [00:54&lt;08:20, 79.8kB/s]
+  8%|8         | 3.41M/41.5M [00:54&lt;09:30, 70.0kB/s]
+  8%|8         | 3.42M/41.5M [00:55&lt;08:40, 76.8kB/s]
+  8%|8         | 3.44M/41.5M [00:55&lt;08:09, 81.5kB/s]
+  8%|8         | 3.45M/41.5M [00:55&lt;07:49, 84.9kB/s]
+  8%|8         | 3.47M/41.5M [00:55&lt;07:37, 87.2kB/s]
+  8%|8         | 3.48M/41.5M [00:55&lt;08:34, 77.4kB/s]
+  8%|8         | 3.49M/41.5M [00:56&lt;09:00, 73.8kB/s]
+  8%|8         | 3.52M/41.5M [00:56&lt;07:38, 86.9kB/s]
+  9%|8         | 3.53M/41.5M [00:56&lt;07:29, 88.5kB/s]
+  9%|8         | 3.55M/41.5M [00:56&lt;09:30, 69.7kB/s]
+  9%|8         | 3.57M/41.5M [00:56&lt;07:13, 91.8kB/s]
+  9%|8         | 3.59M/41.5M [00:57&lt;07:39, 86.5kB/s]
+  9%|8         | 3.60M/41.5M [00:57&lt;07:30, 88.2kB/s]
+  9%|8         | 3.62M/41.5M [00:57&lt;08:28, 78.1kB/s]
+  9%|8         | 3.63M/41.5M [00:57&lt;08:04, 81.9kB/s]
+  9%|8         | 3.65M/41.5M [00:57&lt;07:48, 84.7kB/s]
+  9%|8         | 3.66M/41.5M [00:58&lt;07:36, 87.0kB/s]
+  9%|8         | 3.68M/41.5M [00:58&lt;09:34, 69.1kB/s]
+  9%|8         | 3.70M/41.5M [00:58&lt;08:50, 74.7kB/s]
+  9%|8         | 3.70M/41.5M [00:58&lt;09:46, 67.5kB/s]
+  9%|8         | 3.72M/41.5M [00:59&lt;09:27, 69.8kB/s]
+  9%|8         | 3.73M/41.5M [00:59&lt;09:43, 67.9kB/s]
+  9%|9         | 3.73M/41.5M [00:59&lt;13:45, 48.0kB/s]
+  9%|9         | 3.76M/41.5M [00:59&lt;10:32, 62.6kB/s]
+  9%|9         | 3.77M/41.5M [00:59&lt;09:27, 69.7kB/s]
+  9%|9         | 3.79M/41.5M [01:00&lt;09:18, 70.7kB/s]
+  9%|9         | 3.80M/41.5M [01:00&lt;12:05, 54.5kB/s]
+  9%|9         | 3.82M/41.5M [01:00&lt;09:24, 69.9kB/s]
+  9%|9         | 3.83M/41.5M [01:00&lt;10:14, 64.3kB/s]
+  9%|9         | 3.84M/41.5M [01:01&lt;09:11, 71.5kB/s]
+  9%|9         | 3.85M/41.5M [01:01&lt;10:08, 64.8kB/s]
+  9%|9         | 3.87M/41.5M [01:01&lt;11:33, 56.8kB/s]
+  9%|9         | 3.89M/41.5M [01:01&lt;08:37, 76.2kB/s]
+  9%|9         | 3.91M/41.5M [01:01&lt;08:39, 75.8kB/s]
+  9%|9         | 3.91M/41.5M [01:02&lt;09:01, 72.8kB/s]
+  9%|9         | 3.92M/41.5M [01:02&lt;10:01, 65.4kB/s]
+  9%|9         | 3.94M/41.5M [01:02&lt;08:58, 73.2kB/s]
+ 10%|9         | 3.95M/41.5M [01:02&lt;08:19, 78.8kB/s]
+ 10%|9         | 3.97M/41.5M [01:02&lt;08:27, 77.5kB/s]
+ 10%|9         | 3.98M/41.5M [01:03&lt;08:00, 81.8kB/s]
+ 10%|9         | 4.00M/41.5M [01:03&lt;07:42, 84.9kB/s]
+ 10%|9         | 4.02M/41.5M [01:03&lt;07:30, 87.2kB/s]
+ 10%|9         | 4.03M/41.5M [01:03&lt;07:22, 88.8kB/s]
+ 10%|9         | 4.05M/41.5M [01:03&lt;06:45, 96.8kB/s]
+ 10%|9         | 4.06M/41.5M [01:03&lt;06:50, 95.5kB/s]
+ 10%|9         | 4.08M/41.5M [01:04&lt;06:54, 94.7kB/s]
+ 10%|9         | 4.10M/41.5M [01:04&lt;06:02, 108kB/s]
+ 10%|9         | 4.12M/41.5M [01:04&lt;06:18, 104kB/s]
+ 10%|9         | 4.13M/41.5M [01:04&lt;06:30, 100kB/s]
+ 10%|#         | 4.16M/41.5M [01:04&lt;05:49, 112kB/s]
+ 10%|#         | 4.18M/41.5M [01:04&lt;05:25, 120kB/s]
+ 10%|#         | 4.20M/41.5M [01:05&lt;05:10, 126kB/s]
+ 10%|#         | 4.23M/41.5M [01:05&lt;05:00, 130kB/s]
+ 10%|#         | 4.24M/41.5M [01:05&lt;05:52, 111kB/s]
+ 10%|#         | 4.27M/41.5M [01:05&lt;05:05, 128kB/s]
+ 10%|#         | 4.29M/41.5M [01:05&lt;05:19, 122kB/s]
+ 10%|#         | 4.30M/41.5M [01:06&lt;05:42, 114kB/s]
+ 10%|#         | 4.33M/41.5M [01:06&lt;05:21, 121kB/s]
+ 10%|#         | 4.34M/41.5M [01:06&lt;05:21, 121kB/s]
+ 11%|#         | 4.37M/41.5M [01:06&lt;05:06, 127kB/s]
+ 11%|#         | 4.38M/41.5M [01:06&lt;05:59, 108kB/s]
+ 11%|#         | 4.41M/41.5M [01:06&lt;05:09, 126kB/s]
+ 11%|#         | 4.43M/41.5M [01:07&lt;05:20, 121kB/s]
+ 11%|#         | 4.45M/41.5M [01:07&lt;05:43, 113kB/s]
+ 11%|#         | 4.47M/41.5M [01:07&lt;05:21, 121kB/s]
+ 11%|#         | 4.48M/41.5M [01:07&lt;06:17, 103kB/s]
+ 11%|#         | 4.50M/41.5M [01:07&lt;05:54, 109kB/s]
+ 11%|#         | 4.52M/41.5M [01:07&lt;05:27, 118kB/s]
+ 11%|#         | 4.54M/41.5M [01:08&lt;05:50, 111kB/s]
+ 11%|#         | 4.55M/41.5M [01:08&lt;06:07, 105kB/s]
+ 11%|#1        | 4.57M/41.5M [01:08&lt;06:56, 92.9kB/s]
+ 11%|#1        | 4.59M/41.5M [01:08&lt;06:49, 94.6kB/s]
+ 11%|#1        | 4.60M/41.5M [01:08&lt;06:51, 94.1kB/s]
+ 11%|#1        | 4.62M/41.5M [01:09&lt;07:00, 91.9kB/s]
+ 11%|#1        | 4.63M/41.5M [01:09&lt;06:59, 92.2kB/s]
+ 11%|#1        | 4.65M/41.5M [01:09&lt;06:50, 94.2kB/s]
+ 11%|#1        | 4.66M/41.5M [01:09&lt;06:51, 93.7kB/s]
+ 11%|#1        | 4.68M/41.5M [01:09&lt;06:53, 93.4kB/s]
+ 11%|#1        | 4.70M/41.5M [01:09&lt;06:01, 107kB/s]
+ 11%|#1        | 4.71M/41.5M [01:10&lt;05:46, 111kB/s]
+ 11%|#1        | 4.73M/41.5M [01:10&lt;05:34, 115kB/s]
+ 11%|#1        | 4.74M/41.5M [01:10&lt;05:58, 107kB/s]
+ 11%|#1        | 4.76M/41.5M [01:10&lt;06:07, 105kB/s]
+ 12%|#1        | 4.77M/41.5M [01:10&lt;06:21, 101kB/s]
+ 12%|#1        | 4.79M/41.5M [01:10&lt;05:39, 113kB/s]
+ 12%|#1        | 4.80M/41.5M [01:10&lt;05:33, 115kB/s]
+ 12%|#1        | 4.82M/41.5M [01:11&lt;05:13, 123kB/s]
+ 12%|#1        | 4.84M/41.5M [01:11&lt;05:43, 112kB/s]
+ 12%|#1        | 4.85M/41.5M [01:11&lt;05:12, 123kB/s]
+ 12%|#1        | 4.87M/41.5M [01:11&lt;06:35, 97.2kB/s]
+ 12%|#1        | 4.89M/41.5M [01:11&lt;05:48, 110kB/s]
+ 12%|#1        | 4.91M/41.5M [01:11&lt;06:05, 105kB/s]
+ 12%|#1        | 4.92M/41.5M [01:12&lt;06:18, 101kB/s]
+ 12%|#1        | 4.95M/41.5M [01:12&lt;05:40, 113kB/s]
+ 12%|#1        | 4.97M/41.5M [01:12&lt;05:17, 121kB/s]
+ 12%|#2        | 4.98M/41.5M [01:12&lt;05:40, 112kB/s]
+ 12%|#2        | 5.00M/41.5M [01:12&lt;07:48, 81.7kB/s]
+ 12%|#2        | 5.02M/41.5M [01:13&lt;06:38, 96.0kB/s]
+ 12%|#2        | 5.05M/41.5M [01:13&lt;05:19, 119kB/s]
+ 12%|#2        | 5.07M/41.5M [01:13&lt;05:40, 112kB/s]
+ 12%|#2        | 5.09M/41.5M [01:13&lt;05:57, 107kB/s]
+ 12%|#2        | 5.11M/41.5M [01:13&lt;05:28, 116kB/s]
+ 12%|#2        | 5.12M/41.5M [01:14&lt;06:26, 98.7kB/s]
+ 12%|#2        | 5.15M/41.5M [01:14&lt;05:47, 110kB/s]
+ 12%|#2        | 5.16M/41.5M [01:14&lt;06:02, 105kB/s]
+ 12%|#2        | 5.18M/41.5M [01:14&lt;05:36, 113kB/s]
+ 13%|#2        | 5.20M/41.5M [01:14&lt;05:56, 107kB/s]
+ 13%|#2        | 5.21M/41.5M [01:14&lt;06:11, 102kB/s]
+ 13%|#2        | 5.23M/41.5M [01:15&lt;06:22, 99.5kB/s]
+ 13%|#2        | 5.24M/41.5M [01:15&lt;06:29, 97.5kB/s]
+ 13%|#2        | 5.26M/41.5M [01:15&lt;06:35, 96.0kB/s]
+ 13%|#2        | 5.27M/41.5M [01:15&lt;05:50, 108kB/s]
+ 13%|#2        | 5.29M/41.5M [01:15&lt;05:39, 112kB/s]
+ 13%|#2        | 5.30M/41.5M [01:15&lt;05:47, 109kB/s]
+ 13%|#2        | 5.32M/41.5M [01:15&lt;05:34, 113kB/s]
+ 13%|#2        | 5.34M/41.5M [01:16&lt;05:06, 124kB/s]
+ 13%|#2        | 5.35M/41.5M [01:16&lt;04:56, 128kB/s]
+ 13%|#2        | 5.37M/41.5M [01:16&lt;05:29, 115kB/s]
+ 13%|#2        | 5.38M/41.5M [01:16&lt;07:23, 85.3kB/s]
+ 13%|#3        | 5.42M/41.5M [01:16&lt;04:58, 127kB/s]
+ 13%|#3        | 5.44M/41.5M [01:16&lt;04:45, 132kB/s]
+ 13%|#3        | 5.45M/41.5M [01:17&lt;05:15, 120kB/s]
+ 13%|#3        | 5.47M/41.5M [01:17&lt;05:03, 124kB/s]
+ 13%|#3        | 5.48M/41.5M [01:17&lt;07:59, 78.7kB/s]
+ 13%|#3        | 5.52M/41.5M [01:17&lt;04:56, 127kB/s]
+ 13%|#3        | 5.55M/41.5M [01:18&lt;05:15, 119kB/s]
+ 13%|#3        | 5.56M/41.5M [01:18&lt;06:34, 95.4kB/s]
+ 13%|#3        | 5.59M/41.5M [01:18&lt;06:24, 97.9kB/s]
+ 14%|#3        | 5.60M/41.5M [01:18&lt;06:29, 96.6kB/s]
+ 14%|#3        | 5.62M/41.5M [01:18&lt;06:33, 95.6kB/s]
+ 14%|#3        | 5.63M/41.5M [01:19&lt;06:36, 94.9kB/s]
+ 14%|#3        | 5.65M/41.5M [01:19&lt;05:58, 105kB/s]
+ 14%|#3        | 5.66M/41.5M [01:19&lt;05:44, 109kB/s]
+ 14%|#3        | 5.68M/41.5M [01:19&lt;06:01, 104kB/s]
+ 14%|#3        | 5.70M/41.5M [01:19&lt;05:55, 106kB/s]
+ 14%|#3        | 5.71M/41.5M [01:19&lt;05:40, 110kB/s]
+ 14%|#3        | 5.73M/41.5M [01:19&lt;05:59, 104kB/s]
+ 14%|#3        | 5.74M/41.5M [01:20&lt;06:12, 101kB/s]
+ 14%|#3        | 5.76M/41.5M [01:20&lt;05:38, 111kB/s]
+ 14%|#3        | 5.77M/41.5M [01:20&lt;05:29, 114kB/s]
+ 14%|#3        | 5.79M/41.5M [01:20&lt;05:51, 107kB/s]
+ 14%|#3        | 5.80M/41.5M [01:20&lt;05:18, 117kB/s]
+ 14%|#4        | 5.82M/41.5M [01:20&lt;05:27, 114kB/s]
+ 14%|#4        | 5.84M/41.5M [01:20&lt;05:21, 116kB/s]
+ 14%|#4        | 5.85M/41.5M [01:21&lt;04:58, 125kB/s]
+ 14%|#4        | 5.87M/41.5M [01:21&lt;05:01, 124kB/s]
+ 14%|#4        | 5.88M/41.5M [01:21&lt;04:47, 130kB/s]
+ 14%|#4        | 5.90M/41.5M [01:21&lt;04:53, 127kB/s]
+ 14%|#4        | 5.91M/41.5M [01:21&lt;07:27, 83.4kB/s]
+ 14%|#4        | 5.95M/41.5M [01:21&lt;04:25, 140kB/s]
+ 14%|#4        | 5.98M/41.5M [01:22&lt;04:45, 130kB/s]
+ 14%|#4        | 6.00M/41.5M [01:22&lt;04:53, 127kB/s]
+ 14%|#4        | 6.02M/41.5M [01:22&lt;04:41, 132kB/s]
+ 15%|#4        | 6.03M/41.5M [01:22&lt;05:10, 120kB/s]
+ 15%|#4        | 6.05M/41.5M [01:22&lt;05:58, 104kB/s]
+ 15%|#4        | 6.06M/41.5M [01:23&lt;06:09, 101kB/s]
+ 15%|#4        | 6.09M/41.5M [01:23&lt;07:09, 86.5kB/s]
+ 15%|#4        | 6.11M/41.5M [01:23&lt;05:52, 105kB/s]
+ 15%|#4        | 6.12M/41.5M [01:23&lt;06:03, 102kB/s]
+ 15%|#4        | 6.14M/41.5M [01:23&lt;06:13, 99.3kB/s]
+ 15%|#4        | 6.16M/41.5M [01:24&lt;06:20, 97.3kB/s]
+ 15%|#4        | 6.17M/41.5M [01:24&lt;10:08, 60.8kB/s]
+ 15%|#4        | 6.20M/41.5M [01:24&lt;07:06, 86.7kB/s]
+ 15%|#4        | 6.22M/41.5M [01:24&lt;07:44, 79.7kB/s]
+ 15%|#5        | 6.23M/41.5M [01:25&lt;07:27, 82.6kB/s]
+ 15%|#5        | 6.25M/41.5M [01:25&lt;07:14, 85.1kB/s]
+ 15%|#5        | 6.27M/41.5M [01:25&lt;07:04, 87.1kB/s]
+ 15%|#5        | 6.28M/41.5M [01:25&lt;06:56, 88.6kB/s]
+ 15%|#5        | 6.30M/41.5M [01:25&lt;06:50, 89.8kB/s]
+ 15%|#5        | 6.31M/41.5M [01:26&lt;06:46, 90.6kB/s]
+ 15%|#5        | 6.33M/41.5M [01:26&lt;07:47, 78.8kB/s]
+ 15%|#5        | 6.35M/41.5M [01:26&lt;07:16, 84.4kB/s]
+ 15%|#5        | 6.37M/41.5M [01:26&lt;07:05, 86.5kB/s]
+ 15%|#5        | 6.38M/41.5M [01:26&lt;06:57, 88.2kB/s]
+ 15%|#5        | 6.40M/41.5M [01:27&lt;06:51, 89.4kB/s]
+ 15%|#5        | 6.41M/41.5M [01:27&lt;06:46, 90.4kB/s]
+ 15%|#5        | 6.43M/41.5M [01:27&lt;06:43, 91.1kB/s]
+ 16%|#5        | 6.45M/41.5M [01:27&lt;06:41, 91.6kB/s]
+ 16%|#5        | 6.46M/41.5M [01:27&lt;08:36, 71.1kB/s]
+ 16%|#5        | 6.49M/41.5M [01:28&lt;06:10, 99.1kB/s]
+ 16%|#5        | 6.51M/41.5M [01:28&lt;06:17, 97.3kB/s]
+ 16%|#5        | 6.52M/41.5M [01:28&lt;06:21, 96.1kB/s]
+ 16%|#5        | 6.54M/41.5M [01:28&lt;06:24, 95.3kB/s]
+ 16%|#5        | 6.55M/41.5M [01:28&lt;06:27, 94.6kB/s]
+ 16%|#5        | 6.57M/41.5M [01:29&lt;06:29, 94.1kB/s]
+ 16%|#5        | 6.59M/41.5M [01:29&lt;06:30, 93.7kB/s]
+ 16%|#5        | 6.60M/41.5M [01:29&lt;06:31, 93.4kB/s]
+ 16%|#5        | 6.62M/41.5M [01:29&lt;06:32, 93.2kB/s]
+ 16%|#5        | 6.63M/41.5M [01:29&lt;06:32, 93.1kB/s]
+ 16%|#6        | 6.65M/41.5M [01:30&lt;08:30, 71.6kB/s]
+ 16%|#6        | 6.68M/41.5M [01:30&lt;06:05, 99.8kB/s]
+ 16%|#6        | 6.70M/41.5M [01:30&lt;06:12, 97.9kB/s]
+ 16%|#6        | 6.71M/41.5M [01:30&lt;08:01, 75.8kB/s]
+ 16%|#6        | 6.74M/41.5M [01:31&lt;05:59, 101kB/s]
+ 16%|#6        | 6.76M/41.5M [01:31&lt;06:06, 99.3kB/s]
+ 16%|#6        | 6.77M/41.5M [01:31&lt;06:13, 97.5kB/s]
+ 16%|#6        | 6.79M/41.5M [01:31&lt;06:42, 90.4kB/s]
+ 16%|#6        | 6.80M/41.5M [01:31&lt;07:09, 84.6kB/s]
+ 16%|#6        | 6.82M/41.5M [01:31&lt;06:27, 93.7kB/s]
+ 16%|#6        | 6.84M/41.5M [01:32&lt;06:28, 93.4kB/s]
+ 17%|#6        | 6.85M/41.5M [01:32&lt;08:23, 72.1kB/s]
+ 17%|#6        | 6.88M/41.5M [01:32&lt;06:26, 93.8kB/s]
+ 17%|#6        | 6.89M/41.5M [01:32&lt;07:22, 82.1kB/s]
+ 17%|#6        | 6.91M/41.5M [01:33&lt;07:07, 84.8kB/s]
+ 17%|#6        | 6.93M/41.5M [01:33&lt;06:51, 88.0kB/s]
+ 17%|#6        | 6.95M/41.5M [01:33&lt;06:46, 89.2kB/s]
+ 17%|#6        | 6.96M/41.5M [01:33&lt;06:41, 90.1kB/s]
+ 17%|#6        | 6.98M/41.5M [01:33&lt;06:38, 90.9kB/s]
+ 17%|#6        | 6.99M/41.5M [01:34&lt;06:35, 91.4kB/s]
+ 17%|#6        | 7.02M/41.5M [01:34&lt;07:22, 81.6kB/s]
+ 17%|#6        | 7.05M/41.5M [01:34&lt;05:38, 107kB/s]
+ 17%|#7        | 7.06M/41.5M [01:34&lt;05:50, 103kB/s]
+ 17%|#7        | 7.08M/41.5M [01:34&lt;05:59, 100kB/s]
+ 17%|#7        | 7.09M/41.5M [01:35&lt;06:07, 98.2kB/s]
+ 17%|#7        | 7.11M/41.5M [01:35&lt;07:59, 75.2kB/s]
+ 17%|#7        | 7.13M/41.5M [01:35&lt;06:37, 90.6kB/s]
+ 17%|#7        | 7.15M/41.5M [01:35&lt;06:35, 91.1kB/s]
+ 17%|#7        | 7.16M/41.5M [01:35&lt;06:33, 91.5kB/s]
+ 17%|#7        | 7.18M/41.5M [01:36&lt;06:31, 91.9kB/s]
+ 17%|#7        | 7.20M/41.5M [01:36&lt;06:30, 92.1kB/s]
+ 17%|#7        | 7.21M/41.5M [01:36&lt;06:29, 92.3kB/s]
+ 17%|#7        | 7.23M/41.5M [01:36&lt;06:28, 92.5kB/s]
+ 17%|#7        | 7.24M/41.5M [01:36&lt;06:04, 98.4kB/s]
+ 18%|#7        | 7.27M/41.5M [01:37&lt;05:43, 104kB/s]
+ 18%|#7        | 7.28M/41.5M [01:37&lt;05:55, 101kB/s]
+ 18%|#7        | 7.30M/41.5M [01:37&lt;07:51, 76.1kB/s]
+ 18%|#7        | 7.33M/41.5M [01:37&lt;05:31, 108kB/s]
+ 18%|#7        | 7.34M/41.5M [01:37&lt;06:03, 98.6kB/s]
+ 18%|#7        | 7.36M/41.5M [01:38&lt;07:27, 80.0kB/s]
+ 18%|#7        | 7.38M/41.5M [01:38&lt;07:31, 79.2kB/s]
+ 18%|#7        | 7.39M/41.5M [01:38&lt;08:59, 66.2kB/s]
+ 18%|#7        | 7.41M/41.5M [01:38&lt;08:15, 72.1kB/s]
+ 18%|#7        | 7.41M/41.5M [01:39&lt;11:12, 53.1kB/s]
+ 18%|#7        | 7.44M/41.5M [01:39&lt;08:17, 71.7kB/s]
+ 18%|#7        | 7.45M/41.5M [01:39&lt;09:35, 62.1kB/s]
+ 18%|#7        | 7.46M/41.5M [01:40&lt;10:07, 58.7kB/s]
+ 18%|#8        | 7.48M/41.5M [01:40&lt;09:25, 63.1kB/s]
+ 18%|#8        | 7.48M/41.5M [01:40&lt;09:28, 62.7kB/s]
+ 18%|#8        | 7.49M/41.5M [01:40&lt;10:10, 58.4kB/s]
+ 18%|#8        | 7.51M/41.5M [01:40&lt;08:44, 67.9kB/s]
+ 18%|#8        | 7.52M/41.5M [01:40&lt;09:36, 61.8kB/s]
+ 18%|#8        | 7.53M/41.5M [01:41&lt;08:23, 70.7kB/s]
+ 18%|#8        | 7.55M/41.5M [01:41&lt;07:41, 77.1kB/s]
+ 18%|#8        | 7.56M/41.5M [01:41&lt;07:15, 81.7kB/s]
+ 18%|#8        | 7.58M/41.5M [01:41&lt;07:27, 79.4kB/s]
+ 18%|#8        | 7.59M/41.5M [01:41&lt;07:53, 75.1kB/s]
+ 18%|#8        | 7.59M/41.5M [01:42&lt;09:34, 61.9kB/s]
+ 18%|#8        | 7.62M/41.5M [01:42&lt;06:34, 90.1kB/s]
+ 18%|#8        | 7.63M/41.5M [01:42&lt;06:30, 90.9kB/s]
+ 18%|#8        | 7.65M/41.5M [01:42&lt;08:25, 70.2kB/s]
+ 18%|#8        | 7.67M/41.5M [01:42&lt;06:45, 87.4kB/s]
+ 19%|#8        | 7.69M/41.5M [01:43&lt;08:24, 70.3kB/s]
+ 19%|#8        | 7.70M/41.5M [01:43&lt;07:49, 75.5kB/s]
+ 19%|#8        | 7.72M/41.5M [01:43&lt;09:13, 64.0kB/s]
+ 19%|#8        | 7.73M/41.5M [01:43&lt;08:23, 70.3kB/s]
+ 19%|#8        | 7.74M/41.5M [01:44&lt;09:07, 64.6kB/s]
+ 19%|#8        | 7.76M/41.5M [01:44&lt;14:48, 39.8kB/s]
+ 19%|#8        | 7.79M/41.5M [01:44&lt;08:57, 65.7kB/s]
+ 19%|#8        | 7.80M/41.5M [01:45&lt;08:17, 71.1kB/s]
+ 19%|#8        | 7.82M/41.5M [01:45&lt;11:10, 52.7kB/s]
+ 19%|#8        | 7.84M/41.5M [01:45&lt;08:37, 68.2kB/s]
+ 19%|#8        | 7.86M/41.5M [01:46&lt;09:40, 60.8kB/s]
+ 19%|#8        | 7.88M/41.5M [01:46&lt;08:45, 67.0kB/s]
+ 19%|#9        | 7.89M/41.5M [01:46&lt;08:05, 72.6kB/s]
+ 19%|#9        | 7.91M/41.5M [01:46&lt;07:59, 73.4kB/s]
+ 19%|#9        | 7.92M/41.5M [01:47&lt;08:54, 65.8kB/s]
+ 19%|#9        | 7.94M/41.5M [01:47&lt;08:09, 71.9kB/s]
+ 19%|#9        | 7.95M/41.5M [01:47&lt;07:36, 77.0kB/s]
+ 19%|#9        | 7.97M/41.5M [01:47&lt;07:13, 81.1kB/s]
+ 19%|#9        | 7.98M/41.5M [01:47&lt;08:50, 66.2kB/s]
+ 19%|#9        | 8.00M/41.5M [01:48&lt;08:04, 72.4kB/s]
+ 19%|#9        | 8.01M/41.5M [01:48&lt;08:52, 65.9kB/s]
+ 19%|#9        | 8.02M/41.5M [01:48&lt;08:01, 72.9kB/s]
+ 19%|#9        | 8.04M/41.5M [01:48&lt;07:27, 78.4kB/s]
+ 19%|#9        | 8.05M/41.5M [01:49&lt;09:04, 64.3kB/s]
+ 19%|#9        | 8.08M/41.5M [01:49&lt;07:30, 77.8kB/s]
+ 20%|#9        | 8.09M/41.5M [01:49&lt;08:30, 68.6kB/s]
+ 20%|#9        | 8.11M/41.5M [01:49&lt;10:31, 55.4kB/s]
+ 20%|#9        | 8.14M/41.5M [01:50&lt;07:56, 73.4kB/s]
+ 20%|#9        | 8.16M/41.5M [01:50&lt;07:54, 73.7kB/s]
+ 20%|#9        | 8.16M/41.5M [01:50&lt;08:08, 71.5kB/s]
+ 20%|#9        | 8.18M/41.5M [01:50&lt;07:35, 76.7kB/s]
+ 20%|#9        | 8.20M/41.5M [01:51&lt;07:38, 76.2kB/s]
+ 20%|#9        | 8.20M/41.5M [01:51&lt;11:14, 51.7kB/s]
+ 20%|#9        | 8.23M/41.5M [01:52&lt;10:53, 53.4kB/s]
+ 20%|#9        | 8.26M/41.5M [01:52&lt;08:32, 68.0kB/s]
+ 20%|#9        | 8.27M/41.5M [01:52&lt;08:12, 70.7kB/s]
+ 20%|#9        | 8.29M/41.5M [01:52&lt;09:20, 62.1kB/s]
+ 20%|#9        | 8.30M/41.5M [01:52&lt;09:50, 59.0kB/s]
+ 20%|##        | 8.30M/41.5M [01:53&lt;09:57, 58.2kB/s]
+ 20%|##        | 8.32M/41.5M [01:53&lt;09:01, 64.2kB/s]
+ 20%|##        | 8.33M/41.5M [01:53&lt;09:18, 62.3kB/s]
+ 20%|##        | 8.34M/41.5M [01:54&lt;17:56, 32.3kB/s]
+ 20%|##        | 8.39M/41.5M [01:54&lt;08:07, 71.2kB/s]
+ 20%|##        | 8.41M/41.5M [01:54&lt;07:41, 75.1kB/s]
+ 20%|##        | 8.42M/41.5M [01:55&lt;08:51, 65.3kB/s]
+ 20%|##        | 8.44M/41.5M [01:55&lt;08:10, 70.7kB/s]
+ 20%|##        | 8.45M/41.5M [01:55&lt;07:38, 75.5kB/s]
+ 20%|##        | 8.47M/41.5M [01:55&lt;07:14, 79.6kB/s]
+ 20%|##        | 8.48M/41.5M [01:55&lt;06:57, 82.9kB/s]
+ 20%|##        | 8.50M/41.5M [01:55&lt;06:44, 85.6kB/s]
+ 21%|##        | 8.52M/41.5M [01:56&lt;06:34, 87.5kB/s]
+ 21%|##        | 8.53M/41.5M [01:56&lt;06:28, 89.0kB/s]
+ 21%|##        | 8.55M/41.5M [01:56&lt;06:23, 90.1kB/s]
+ 21%|##        | 8.56M/41.5M [01:56&lt;06:19, 90.9kB/s]
+ 21%|##        | 8.58M/41.5M [01:56&lt;06:17, 91.4kB/s]
+ 21%|##        | 8.59M/41.5M [01:56&lt;06:15, 91.8kB/s]
+ 21%|##        | 8.61M/41.5M [01:57&lt;06:14, 92.1kB/s]
+ 21%|##        | 8.62M/41.5M [01:57&lt;08:05, 71.0kB/s]
+ 21%|##        | 8.65M/41.5M [01:57&lt;06:32, 87.8kB/s]
+ 21%|##        | 8.66M/41.5M [01:57&lt;06:26, 89.1kB/s]
+ 21%|##        | 8.68M/41.5M [01:58&lt;06:21, 90.1kB/s]
+ 21%|##        | 8.70M/41.5M [01:58&lt;05:31, 104kB/s]
+ 21%|##1       | 8.72M/41.5M [01:58&lt;07:42, 74.3kB/s]
+ 21%|##1       | 8.76M/41.5M [01:58&lt;04:52, 117kB/s]
+ 21%|##1       | 8.77M/41.5M [01:58&lt;05:09, 111kB/s]
+ 21%|##1       | 8.79M/41.5M [01:59&lt;05:23, 106kB/s]
+ 21%|##1       | 8.80M/41.5M [01:59&lt;05:35, 102kB/s]
+ 21%|##1       | 8.82M/41.5M [01:59&lt;05:43, 99.6kB/s]
+ 21%|##1       | 8.84M/41.5M [01:59&lt;05:50, 97.6kB/s]
+ 21%|##1       | 8.86M/41.5M [01:59&lt;05:11, 110kB/s]
+ 21%|##1       | 8.88M/41.5M [01:59&lt;05:26, 105kB/s]
+ 21%|##1       | 8.90M/41.5M [02:00&lt;04:57, 115kB/s]
+ 21%|##1       | 8.91M/41.5M [02:00&lt;08:23, 67.8kB/s]
+ 22%|##1       | 8.97M/41.5M [02:00&lt;05:07, 111kB/s]
+ 22%|##1       | 8.98M/41.5M [02:01&lt;06:27, 88.0kB/s]
+ 22%|##1       | 9.01M/41.5M [02:01&lt;05:46, 98.3kB/s]
+ 22%|##1       | 9.02M/41.5M [02:01&lt;05:50, 97.1kB/s]
+ 22%|##1       | 9.04M/41.5M [02:01&lt;05:54, 96.0kB/s]
+ 22%|##1       | 9.05M/41.5M [02:02&lt;05:57, 95.2kB/s]
+ 22%|##1       | 9.07M/41.5M [02:02&lt;05:59, 94.5kB/s]
+ 22%|##1       | 9.09M/41.5M [02:02&lt;06:01, 94.0kB/s]
+ 22%|##1       | 9.10M/41.5M [02:02&lt;06:02, 93.7kB/s]
+ 22%|##1       | 9.12M/41.5M [02:02&lt;06:03, 93.4kB/s]
+ 22%|##2       | 9.13M/41.5M [02:02&lt;06:03, 93.2kB/s]
+ 22%|##2       | 9.15M/41.5M [02:03&lt;06:04, 93.1kB/s]
+ 22%|##2       | 9.16M/41.5M [02:03&lt;06:04, 93.0kB/s]
+ 22%|##2       | 9.18M/41.5M [02:03&lt;05:42, 99.0kB/s]
+ 22%|##2       | 9.20M/41.5M [02:03&lt;05:48, 97.0kB/s]
+ 22%|##2       | 9.21M/41.5M [02:03&lt;08:05, 69.7kB/s]
+ 22%|##2       | 9.25M/41.5M [02:04&lt;04:55, 115kB/s]
+ 22%|##2       | 9.27M/41.5M [02:04&lt;05:10, 109kB/s]
+ 22%|##2       | 9.29M/41.5M [02:04&lt;04:48, 117kB/s]
+ 22%|##2       | 9.30M/41.5M [02:04&lt;05:06, 110kB/s]
+ 22%|##2       | 9.33M/41.5M [02:04&lt;04:44, 119kB/s]
+ 23%|##2       | 9.34M/41.5M [02:04&lt;05:03, 111kB/s]
+ 23%|##2       | 9.37M/41.5M [02:05&lt;04:42, 119kB/s]
+ 23%|##2       | 9.39M/41.5M [02:05&lt;04:29, 125kB/s]
+ 23%|##2       | 9.41M/41.5M [02:05&lt;04:51, 116kB/s]
+ 23%|##2       | 9.43M/41.5M [02:05&lt;04:34, 123kB/s]
+ 23%|##2       | 9.45M/41.5M [02:05&lt;04:23, 128kB/s]
+ 23%|##2       | 9.47M/41.5M [02:06&lt;06:51, 81.7kB/s]
+ 23%|##2       | 9.52M/41.5M [02:06&lt;04:09, 134kB/s]
+ 23%|##2       | 9.54M/41.5M [02:06&lt;04:57, 112kB/s]
+ 23%|##3       | 9.55M/41.5M [02:06&lt;05:10, 108kB/s]
+ 23%|##3       | 9.58M/41.5M [02:07&lt;07:14, 77.0kB/s]
+ 23%|##3       | 9.63M/41.5M [02:07&lt;04:38, 120kB/s]
+ 23%|##3       | 9.65M/41.5M [02:07&lt;04:39, 119kB/s]
+ 23%|##3       | 9.66M/41.5M [02:07&lt;04:55, 113kB/s]
+ 23%|##3       | 9.68M/41.5M [02:08&lt;05:09, 108kB/s]
+ 23%|##3       | 9.70M/41.5M [02:08&lt;05:21, 104kB/s]
+ 23%|##3       | 9.72M/41.5M [02:08&lt;05:09, 108kB/s]
+ 23%|##3       | 9.73M/41.5M [02:08&lt;05:21, 104kB/s]
+ 23%|##3       | 9.75M/41.5M [02:08&lt;05:12, 107kB/s]
+ 24%|##3       | 9.77M/41.5M [02:09&lt;05:03, 110kB/s]
+ 24%|##3       | 9.79M/41.5M [02:09&lt;04:58, 111kB/s]
+ 24%|##3       | 9.80M/41.5M [02:09&lt;07:09, 77.4kB/s]
+ 24%|##3       | 9.84M/41.5M [02:09&lt;05:05, 108kB/s]
+ 24%|##3       | 9.85M/41.5M [02:09&lt;05:18, 104kB/s]
+ 24%|##3       | 9.87M/41.5M [02:10&lt;05:27, 101kB/s]
+ 24%|##3       | 9.89M/41.5M [02:10&lt;04:56, 112kB/s]
+ 24%|##3       | 9.91M/41.5M [02:10&lt;05:11, 106kB/s]
+ 24%|##3       | 9.92M/41.5M [02:10&lt;06:57, 79.2kB/s]
+ 24%|##4       | 9.96M/41.5M [02:11&lt;04:57, 111kB/s]
+ 24%|##4       | 9.98M/41.5M [02:11&lt;05:10, 107kB/s]
+ 24%|##4       | 10.0M/41.5M [02:11&lt;04:46, 115kB/s]
+ 24%|##4       | 10.0M/41.5M [02:11&lt;06:25, 85.5kB/s]
+ 24%|##4       | 10.0M/41.5M [02:11&lt;05:02, 109kB/s]
+ 24%|##4       | 10.1M/41.5M [02:12&lt;05:14, 105kB/s]
+ 24%|##4       | 10.1M/41.5M [02:12&lt;05:23, 102kB/s]
+ 24%|##4       | 10.1M/41.5M [02:12&lt;08:33, 64.1kB/s]
+ 24%|##4       | 10.1M/41.5M [02:12&lt;05:35, 97.9kB/s]
+ 24%|##4       | 10.1M/41.5M [02:13&lt;06:56, 79.0kB/s]
+ 24%|##4       | 10.2M/41.5M [02:13&lt;06:40, 81.9kB/s]
+ 25%|##4       | 10.2M/41.5M [02:13&lt;06:29, 84.4kB/s]
+ 25%|##4       | 10.2M/41.5M [02:14&lt;07:52, 69.4kB/s]
+ 25%|##4       | 10.2M/41.5M [02:14&lt;10:58, 49.8kB/s]
+ 25%|##4       | 10.2M/41.5M [02:14&lt;08:00, 68.2kB/s]
+ 25%|##4       | 10.2M/41.5M [02:15&lt;08:12, 66.6kB/s]
+ 25%|##4       | 10.3M/41.5M [02:15&lt;08:23, 65.1kB/s]
+ 25%|##4       | 10.3M/41.5M [02:15&lt;11:46, 46.3kB/s]
+ 25%|##4       | 10.3M/41.5M [02:16&lt;08:30, 64.0kB/s]
+ 25%|##4       | 10.3M/41.5M [02:16&lt;07:52, 69.1kB/s]
+ 25%|##4       | 10.3M/41.5M [02:16&lt;08:51, 61.4kB/s]
+ 25%|##4       | 10.4M/41.5M [02:16&lt;08:03, 67.5kB/s]
+ 25%|##5       | 10.4M/41.5M [02:17&lt;07:26, 73.0kB/s]
+ 25%|##5       | 10.4M/41.5M [02:17&lt;10:33, 51.5kB/s]
+ 25%|##5       | 10.4M/41.5M [02:17&lt;06:57, 78.0kB/s]
+ 25%|##5       | 10.4M/41.5M [02:18&lt;08:17, 65.4kB/s]
+ 25%|##5       | 10.5M/41.5M [02:18&lt;08:58, 60.4kB/s]
+ 25%|##5       | 10.5M/41.5M [02:18&lt;09:23, 57.7kB/s]
+ 25%|##5       | 10.5M/41.5M [02:18&lt;09:48, 55.3kB/s]
+ 25%|##5       | 10.5M/41.5M [02:18&lt;10:11, 53.2kB/s]
+ 25%|##5       | 10.5M/41.5M [02:19&lt;08:34, 63.1kB/s]
+ 25%|##5       | 10.5M/41.5M [02:19&lt;09:13, 58.7kB/s]
+ 25%|##5       | 10.5M/41.5M [02:19&lt;09:47, 55.3kB/s]
+ 25%|##5       | 10.5M/41.5M [02:19&lt;08:13, 65.9kB/s]
+ 25%|##5       | 10.5M/41.5M [02:19&lt;08:58, 60.3kB/s]
+ 25%|##5       | 10.5M/41.5M [02:20&lt;07:45, 69.7kB/s]
+ 25%|##5       | 10.6M/41.5M [02:20&lt;08:35, 62.9kB/s]
+ 25%|##5       | 10.6M/41.5M [02:20&lt;07:32, 71.7kB/s]
+ 26%|##5       | 10.6M/41.5M [02:20&lt;08:59, 60.1kB/s]
+ 26%|##5       | 10.6M/41.5M [02:20&lt;06:46, 79.6kB/s]
+ 26%|##5       | 10.6M/41.5M [02:21&lt;06:29, 83.0kB/s]
+ 26%|##5       | 10.6M/41.5M [02:21&lt;08:00, 67.4kB/s]
+ 26%|##5       | 10.7M/41.5M [02:21&lt;07:21, 73.3kB/s]
+ 26%|##5       | 10.7M/41.5M [02:21&lt;06:53, 78.2kB/s]
+ 26%|##5       | 10.7M/41.5M [02:22&lt;08:18, 64.8kB/s]
+ 26%|##5       | 10.7M/41.5M [02:22&lt;06:34, 81.9kB/s]
+ 26%|##5       | 10.7M/41.5M [02:22&lt;09:51, 54.5kB/s]
+ 26%|##5       | 10.8M/41.5M [02:23&lt;07:39, 70.1kB/s]
+ 26%|##5       | 10.8M/41.5M [02:23&lt;08:40, 61.9kB/s]
+ 26%|##5       | 10.8M/41.5M [02:23&lt;08:41, 61.7kB/s]
+ 26%|##6       | 10.8M/41.5M [02:23&lt;08:13, 65.3kB/s]
+ 26%|##6       | 10.8M/41.5M [02:23&lt;08:19, 64.4kB/s]
+ 26%|##6       | 10.8M/41.5M [02:24&lt;07:55, 67.7kB/s]
+ 26%|##6       | 10.8M/41.5M [02:24&lt;08:05, 66.2kB/s]
+ 26%|##6       | 10.8M/41.5M [02:24&lt;07:45, 69.1kB/s]
+ 26%|##6       | 10.8M/41.5M [02:24&lt;07:58, 67.2kB/s]
+ 26%|##6       | 10.9M/41.5M [02:24&lt;07:09, 74.8kB/s]
+ 26%|##6       | 10.9M/41.5M [02:24&lt;06:40, 80.1kB/s]
+ 26%|##6       | 10.9M/41.5M [02:25&lt;06:22, 83.9kB/s]
+ 26%|##6       | 10.9M/41.5M [02:25&lt;06:10, 86.6kB/s]
+ 26%|##6       | 10.9M/41.5M [02:25&lt;05:35, 95.5kB/s]
+ 26%|##6       | 10.9M/41.5M [02:25&lt;06:51, 77.8kB/s]
+ 26%|##6       | 11.0M/41.5M [02:26&lt;06:03, 88.0kB/s]
+ 26%|##6       | 11.0M/41.5M [02:26&lt;07:29, 71.2kB/s]
+ 27%|##6       | 11.0M/41.5M [02:26&lt;06:09, 86.4kB/s]
+ 27%|##6       | 11.0M/41.5M [02:26&lt;07:31, 70.8kB/s]
+ 27%|##6       | 11.0M/41.5M [02:27&lt;08:07, 65.5kB/s]
+ 27%|##6       | 11.0M/41.5M [02:27&lt;08:43, 60.9kB/s]
+ 27%|##6       | 11.0M/41.5M [02:27&lt;09:17, 57.3kB/s]
+ 27%|##6       | 11.1M/41.5M [02:27&lt;07:58, 66.7kB/s]
+ 27%|##6       | 11.1M/41.5M [02:27&lt;08:42, 61.1kB/s]
+ 27%|##6       | 11.1M/41.5M [02:28&lt;09:19, 57.0kB/s]
+ 27%|##6       | 11.1M/41.5M [02:28&lt;10:10, 52.2kB/s]
+ 27%|##6       | 11.1M/41.5M [02:28&lt;08:33, 62.0kB/s]
+ 27%|##6       | 11.1M/41.5M [02:28&lt;09:09, 58.0kB/s]
+ 27%|##6       | 11.1M/41.5M [02:28&lt;09:40, 54.9kB/s]
+ 27%|##6       | 11.1M/41.5M [02:29&lt;08:06, 65.4kB/s]
+ 27%|##6       | 11.1M/41.5M [02:29&lt;08:50, 60.0kB/s]
+ 27%|##6       | 11.2M/41.5M [02:29&lt;07:38, 69.4kB/s]
+ 27%|##6       | 11.2M/41.5M [02:29&lt;08:26, 62.7kB/s]
+ 27%|##6       | 11.2M/41.5M [02:29&lt;09:08, 57.9kB/s]
+ 27%|##6       | 11.2M/41.5M [02:29&lt;07:45, 68.2kB/s]
+ 27%|##7       | 11.2M/41.5M [02:30&lt;07:00, 75.5kB/s]
+ 27%|##7       | 11.2M/41.5M [02:30&lt;06:33, 80.7kB/s]
+ 27%|##7       | 11.2M/41.5M [02:30&lt;06:16, 84.3kB/s]
+ 27%|##7       | 11.3M/41.5M [02:30&lt;06:05, 86.8kB/s]
+ 27%|##7       | 11.3M/41.5M [02:30&lt;05:57, 88.6kB/s]
+ 27%|##7       | 11.3M/41.5M [02:31&lt;05:52, 89.8kB/s]
+ 27%|##7       | 11.3M/41.5M [02:31&lt;05:48, 90.7kB/s]
+ 27%|##7       | 11.3M/41.5M [02:31&lt;05:46, 91.3kB/s]
+ 27%|##7       | 11.3M/41.5M [02:31&lt;05:44, 91.7kB/s]
+ 27%|##7       | 11.4M/41.5M [02:31&lt;04:58, 106kB/s]
+ 27%|##7       | 11.4M/41.5M [02:31&lt;05:09, 102kB/s]
+ 27%|##7       | 11.4M/41.5M [02:32&lt;05:18, 99.2kB/s]
+ 28%|##7       | 11.4M/41.5M [02:32&lt;06:08, 85.5kB/s]
+ 28%|##7       | 11.4M/41.5M [02:32&lt;05:19, 98.7kB/s]
+ 28%|##7       | 11.5M/41.5M [02:32&lt;05:24, 97.0kB/s]
+ 28%|##7       | 11.5M/41.5M [02:33&lt;06:57, 75.5kB/s]
+ 28%|##7       | 11.5M/41.5M [02:33&lt;06:36, 79.3kB/s]
+ 28%|##7       | 11.5M/41.5M [02:33&lt;06:20, 82.7kB/s]
+ 28%|##7       | 11.5M/41.5M [02:34&lt;09:21, 56.0kB/s]
+ 28%|##7       | 11.5M/41.5M [02:34&lt;07:14, 72.3kB/s]
+ 28%|##7       | 11.6M/41.5M [02:34&lt;07:10, 72.9kB/s]
+ 28%|##7       | 11.6M/41.5M [02:34&lt;07:55, 66.0kB/s]
+ 28%|##7       | 11.6M/41.5M [02:34&lt;07:16, 71.8kB/s]
+ 28%|##7       | 11.6M/41.5M [02:35&lt;06:48, 76.8kB/s]
+ 28%|##8       | 11.6M/41.5M [02:35&lt;06:51, 76.2kB/s]
+ 28%|##8       | 11.6M/41.5M [02:35&lt;07:08, 73.1kB/s]
+ 28%|##8       | 11.6M/41.5M [02:35&lt;06:38, 78.6kB/s]
+ 28%|##8       | 11.7M/41.5M [02:35&lt;06:18, 82.6kB/s]
+ 28%|##8       | 11.7M/41.5M [02:35&lt;06:05, 85.6kB/s]
+ 28%|##8       | 11.7M/41.5M [02:36&lt;05:56, 87.7kB/s]
+ 28%|##8       | 11.7M/41.5M [02:36&lt;05:03, 103kB/s]
+ 28%|##8       | 11.7M/41.5M [02:36&lt;07:07, 73.0kB/s]
+ 28%|##8       | 11.8M/41.5M [02:36&lt;04:55, 105kB/s]
+ 28%|##8       | 11.8M/41.5M [02:37&lt;05:24, 96.0kB/s]
+ 28%|##8       | 11.8M/41.5M [02:37&lt;05:27, 95.2kB/s]
+ 28%|##8       | 11.8M/41.5M [02:37&lt;06:37, 78.3kB/s]
+ 29%|##8       | 11.8M/41.5M [02:37&lt;05:34, 93.1kB/s]
+ 29%|##8       | 11.8M/41.5M [02:38&lt;08:26, 61.4kB/s]
+ 29%|##8       | 11.9M/41.5M [02:38&lt;06:25, 80.6kB/s]
+ 29%|##8       | 11.9M/41.5M [02:38&lt;06:28, 79.9kB/s]
+ 29%|##8       | 11.9M/41.5M [02:38&lt;06:14, 82.7kB/s]
+ 29%|##8       | 11.9M/41.5M [02:39&lt;06:04, 85.2kB/s]
+ 29%|##8       | 11.9M/41.5M [02:39&lt;07:08, 72.2kB/s]
+ 29%|##8       | 12.0M/41.5M [02:39&lt;07:00, 73.7kB/s]
+ 29%|##8       | 12.0M/41.5M [02:39&lt;06:35, 78.3kB/s]
+ 29%|##8       | 12.0M/41.5M [02:39&lt;06:17, 82.0kB/s]
+ 29%|##8       | 12.0M/41.5M [02:40&lt;07:42, 66.9kB/s]
+ 29%|##8       | 12.0M/41.5M [02:40&lt;05:52, 87.5kB/s]
+ 29%|##9       | 12.0M/41.5M [02:40&lt;07:16, 70.7kB/s]
+ 29%|##9       | 12.1M/41.5M [02:40&lt;06:47, 75.7kB/s]
+ 29%|##9       | 12.1M/41.5M [02:41&lt;06:44, 76.2kB/s]
+ 29%|##9       | 12.1M/41.5M [02:41&lt;07:40, 66.9kB/s]
+ 29%|##9       | 12.1M/41.5M [02:41&lt;07:03, 72.8kB/s]
+ 29%|##9       | 12.1M/41.5M [02:41&lt;07:44, 66.3kB/s]
+ 29%|##9       | 12.1M/41.5M [02:42&lt;07:00, 73.2kB/s]
+ 29%|##9       | 12.1M/41.5M [02:42&lt;06:32, 78.5kB/s]
+ 29%|##9       | 12.1M/41.5M [02:42&lt;09:25, 54.4kB/s]
+ 29%|##9       | 12.2M/41.5M [02:42&lt;05:59, 85.5kB/s]
+ 29%|##9       | 12.2M/41.5M [02:43&lt;08:49, 58.0kB/s]
+ 29%|##9       | 12.2M/41.5M [02:43&lt;06:12, 82.5kB/s]
+ 30%|##9       | 12.2M/41.5M [02:43&lt;06:40, 76.6kB/s]
+ 30%|##9       | 12.3M/41.5M [02:44&lt;07:05, 72.1kB/s]
+ 30%|##9       | 12.3M/41.5M [02:44&lt;08:30, 60.0kB/s]
+ 30%|##9       | 12.3M/41.5M [02:44&lt;07:17, 70.0kB/s]
+ 30%|##9       | 12.3M/41.5M [02:44&lt;06:47, 75.2kB/s]
+ 30%|##9       | 12.3M/41.5M [02:45&lt;07:59, 63.8kB/s]
+ 30%|##9       | 12.3M/41.5M [02:45&lt;08:51, 57.5kB/s]
+ 30%|##9       | 12.3M/41.5M [02:45&lt;09:12, 55.3kB/s]
+ 30%|##9       | 12.4M/41.5M [02:45&lt;07:58, 63.8kB/s]
+ 30%|##9       | 12.4M/41.5M [02:45&lt;08:32, 59.5kB/s]
+ 30%|##9       | 12.4M/41.5M [02:46&lt;07:27, 68.1kB/s]
+ 30%|##9       | 12.4M/41.5M [02:46&lt;06:48, 74.6kB/s]
+ 30%|##9       | 12.4M/41.5M [02:46&lt;07:35, 66.9kB/s]
+ 30%|##9       | 12.4M/41.5M [02:46&lt;06:51, 74.1kB/s]
+ 30%|##9       | 12.4M/41.5M [02:47&lt;08:13, 61.7kB/s]
+ 30%|###       | 12.5M/41.5M [02:47&lt;06:40, 76.0kB/s]
+ 30%|###       | 12.5M/41.5M [02:47&lt;06:56, 73.1kB/s]
+ 30%|###       | 12.5M/41.5M [02:47&lt;07:42, 65.8kB/s]
+ 30%|###       | 12.5M/41.5M [02:47&lt;08:50, 57.3kB/s]
+ 30%|###       | 12.5M/41.5M [02:48&lt;06:35, 76.8kB/s]
+ 30%|###       | 12.5M/41.5M [02:48&lt;06:39, 76.1kB/s]
+ 30%|###       | 12.5M/41.5M [02:48&lt;06:55, 73.1kB/s]
+ 30%|###       | 12.5M/41.5M [02:48&lt;07:42, 65.6kB/s]
+ 30%|###       | 12.6M/41.5M [02:48&lt;06:53, 73.4kB/s]
+ 30%|###       | 12.6M/41.5M [02:48&lt;06:23, 79.0kB/s]
+ 30%|###       | 12.6M/41.5M [02:49&lt;08:18, 60.8kB/s]
+ 30%|###       | 12.6M/41.5M [02:49&lt;06:00, 83.9kB/s]
+ 30%|###       | 12.6M/41.5M [02:49&lt;06:35, 76.4kB/s]
+ 30%|###       | 12.6M/41.5M [02:49&lt;06:15, 80.5kB/s]
+ 31%|###       | 12.7M/41.5M [02:50&lt;06:49, 73.8kB/s]
+ 31%|###       | 12.7M/41.5M [02:50&lt;06:24, 78.5kB/s]
+ 31%|###       | 12.7M/41.5M [02:50&lt;06:07, 82.3kB/s]
+ 31%|###       | 12.7M/41.5M [02:50&lt;06:17, 80.0kB/s]
+ 31%|###       | 12.7M/41.5M [02:50&lt;06:26, 78.1kB/s]
+ 31%|###       | 12.7M/41.5M [02:51&lt;05:43, 87.9kB/s]
+ 31%|###       | 12.8M/41.5M [02:51&lt;06:02, 83.1kB/s]
+ 31%|###       | 12.8M/41.5M [02:51&lt;05:50, 85.8kB/s]
+ 31%|###       | 12.8M/41.5M [02:51&lt;05:42, 87.8kB/s]
+ 31%|###       | 12.8M/41.5M [02:51&lt;05:12, 96.3kB/s]
+ 31%|###       | 12.8M/41.5M [02:51&lt;05:15, 95.2kB/s]
+ 31%|###       | 12.8M/41.5M [02:52&lt;05:43, 87.4kB/s]
+ 31%|###       | 12.9M/41.5M [02:52&lt;04:53, 102kB/s]
+ 31%|###1      | 12.9M/41.5M [02:52&lt;05:21, 93.3kB/s]
+ 31%|###1      | 12.9M/41.5M [02:52&lt;06:52, 72.7kB/s]
+ 31%|###1      | 12.9M/41.5M [02:53&lt;05:37, 88.7kB/s]
+ 31%|###1      | 12.9M/41.5M [02:53&lt;05:34, 89.6kB/s]
+ 31%|###1      | 13.0M/41.5M [02:53&lt;06:54, 72.3kB/s]
+ 31%|###1      | 13.0M/41.5M [02:54&lt;08:18, 60.0kB/s]
+ 31%|###1      | 13.0M/41.5M [02:54&lt;07:29, 66.5kB/s]
+ 31%|###1      | 13.0M/41.5M [02:54&lt;08:01, 62.1kB/s]
+ 31%|###1      | 13.0M/41.5M [02:54&lt;08:32, 58.3kB/s]
+ 31%|###1      | 13.0M/41.5M [02:54&lt;08:25, 59.1kB/s]
+ 31%|###1      | 13.0M/41.5M [02:55&lt;08:57, 55.6kB/s]
+ 31%|###1      | 13.0M/41.5M [02:55&lt;09:23, 52.9kB/s]
+ 31%|###1      | 13.0M/41.5M [02:55&lt;12:34, 39.6kB/s]
+ 31%|###1      | 13.0M/41.5M [02:55&lt;09:28, 52.5kB/s]
+ 31%|###1      | 13.1M/41.5M [02:55&lt;09:45, 50.9kB/s]
+ 32%|###1      | 13.1M/41.5M [02:56&lt;08:33, 58.1kB/s]
+ 32%|###1      | 13.1M/41.5M [02:56&lt;09:01, 55.0kB/s]
+ 32%|###1      | 13.1M/41.5M [02:56&lt;08:45, 56.7kB/s]
+ 32%|###1      | 13.1M/41.5M [02:56&lt;08:25, 58.8kB/s]
+ 32%|###1      | 13.1M/41.5M [02:56&lt;08:23, 59.1kB/s]
+ 32%|###1      | 13.1M/41.5M [02:56&lt;08:16, 60.0kB/s]
+ 32%|###1      | 13.1M/41.5M [02:57&lt;08:22, 59.1kB/s]
+ 32%|###1      | 13.1M/41.5M [02:57&lt;07:05, 69.9kB/s]
+ 32%|###1      | 13.1M/41.5M [02:57&lt;07:06, 69.7kB/s]
+ 32%|###1      | 13.2M/41.5M [02:57&lt;07:05, 69.8kB/s]
+ 32%|###1      | 13.2M/41.5M [02:57&lt;08:57, 55.3kB/s]
+ 32%|###1      | 13.2M/41.5M [02:58&lt;09:19, 53.0kB/s]
+ 32%|###1      | 13.2M/41.5M [02:58&lt;10:33, 46.8kB/s]
+ 32%|###1      | 13.2M/41.5M [02:58&lt;07:25, 66.5kB/s]
+ 32%|###1      | 13.2M/41.5M [02:58&lt;07:59, 61.8kB/s]
+ 32%|###1      | 13.2M/41.5M [02:59&lt;08:31, 58.0kB/s]
+ 32%|###1      | 13.3M/41.5M [02:59&lt;07:21, 67.0kB/s]
+ 32%|###1      | 13.3M/41.5M [02:59&lt;12:25, 39.7kB/s]
+ 32%|###2      | 13.3M/41.5M [03:00&lt;08:15, 59.7kB/s]
+ 32%|###2      | 13.3M/41.5M [03:00&lt;08:40, 56.7kB/s]
+ 32%|###2      | 13.3M/41.5M [03:00&lt;09:04, 54.2kB/s]
+ 32%|###2      | 13.3M/41.5M [03:00&lt;07:40, 64.1kB/s]
+ 32%|###2      | 13.3M/41.5M [03:00&lt;08:17, 59.4kB/s]
+ 32%|###2      | 13.3M/41.5M [03:01&lt;09:10, 53.6kB/s]
+ 32%|###2      | 13.4M/41.5M [03:01&lt;07:48, 62.9kB/s]
+ 32%|###2      | 13.4M/41.5M [03:01&lt;08:22, 58.7kB/s]
+ 32%|###2      | 13.4M/41.5M [03:01&lt;08:51, 55.5kB/s]
+ 32%|###2      | 13.4M/41.5M [03:02&lt;11:34, 42.4kB/s]
+ 32%|###2      | 13.4M/41.5M [03:02&lt;07:55, 62.0kB/s]
+ 32%|###2      | 13.4M/41.5M [03:02&lt;08:23, 58.5kB/s]
+ 32%|###2      | 13.4M/41.5M [03:02&lt;07:20, 66.8kB/s]
+ 32%|###2      | 13.4M/41.5M [03:02&lt;07:57, 61.5kB/s]
+ 32%|###2      | 13.5M/41.5M [03:03&lt;10:46, 45.5kB/s]
+ 33%|###2      | 13.5M/41.5M [03:03&lt;07:37, 64.1kB/s]
+ 33%|###2      | 13.5M/41.5M [03:03&lt;08:07, 60.2kB/s]
+ 33%|###2      | 13.5M/41.5M [03:03&lt;08:36, 56.9kB/s]
+ 33%|###2      | 13.5M/41.5M [03:04&lt;07:24, 66.0kB/s]
+ 33%|###2      | 13.5M/41.5M [03:04&lt;10:12, 47.9kB/s]
+ 33%|###2      | 13.5M/41.5M [03:04&lt;07:04, 69.0kB/s]
+ 33%|###2      | 13.6M/41.5M [03:04&lt;07:41, 63.4kB/s]
+ 33%|###2      | 13.6M/41.5M [03:04&lt;08:16, 58.9kB/s]
+ 33%|###2      | 13.6M/41.5M [03:05&lt;08:47, 55.5kB/s]
+ 33%|###2      | 13.6M/41.5M [03:05&lt;09:12, 53.0kB/s]
+ 33%|###2      | 13.6M/41.5M [03:05&lt;07:34, 64.3kB/s]
+ 33%|###2      | 13.6M/41.5M [03:05&lt;08:14, 59.1kB/s]
+ 33%|###2      | 13.6M/41.5M [03:05&lt;08:47, 55.4kB/s]
+ 33%|###2      | 13.6M/41.5M [03:06&lt;07:19, 66.4kB/s]
+ 33%|###2      | 13.6M/41.5M [03:06&lt;08:03, 60.5kB/s]
+ 33%|###2      | 13.6M/41.5M [03:06&lt;06:56, 70.1kB/s]
+ 33%|###2      | 13.7M/41.5M [03:06&lt;06:19, 76.8kB/s]
+ 33%|###2      | 13.7M/41.5M [03:06&lt;07:44, 62.8kB/s]
+ 33%|###3      | 13.7M/41.5M [03:07&lt;05:56, 81.7kB/s]
+ 33%|###3      | 13.7M/41.5M [03:07&lt;05:44, 84.6kB/s]
+ 33%|###3      | 13.7M/41.5M [03:07&lt;05:35, 86.8kB/s]
+ 33%|###3      | 13.8M/41.5M [03:07&lt;05:28, 88.5kB/s]
+ 33%|###3      | 13.8M/41.5M [03:07&lt;05:24, 89.7kB/s]
+ 33%|###3      | 13.8M/41.5M [03:07&lt;05:20, 90.6kB/s]
+ 33%|###3      | 13.8M/41.5M [03:08&lt;05:18, 91.2kB/s]
+ 33%|###3      | 13.8M/41.5M [03:08&lt;05:16, 91.7kB/s]
+ 33%|###3      | 13.8M/41.5M [03:08&lt;05:15, 92.0kB/s]
+ 33%|###3      | 13.8M/41.5M [03:08&lt;05:14, 92.2kB/s]
+ 33%|###3      | 13.9M/41.5M [03:08&lt;05:13, 92.4kB/s]
+ 33%|###3      | 13.9M/41.5M [03:09&lt;06:46, 71.2kB/s]
+ 33%|###3      | 13.9M/41.5M [03:09&lt;05:29, 87.9kB/s]
+ 34%|###3      | 13.9M/41.5M [03:09&lt;05:24, 89.2kB/s]
+ 34%|###3      | 13.9M/41.5M [03:09&lt;05:20, 90.1kB/s]
+ 34%|###3      | 13.9M/41.5M [03:09&lt;05:17, 90.9kB/s]
+ 34%|###3      | 14.0M/41.5M [03:10&lt;05:15, 91.4kB/s]
+ 34%|###3      | 14.0M/41.5M [03:10&lt;05:35, 85.9kB/s]
+ 34%|###3      | 14.0M/41.5M [03:10&lt;05:07, 93.9kB/s]
+ 34%|###3      | 14.0M/41.5M [03:10&lt;05:08, 93.6kB/s]
+ 34%|###3      | 14.0M/41.5M [03:10&lt;05:08, 93.3kB/s]
+ 34%|###3      | 14.0M/41.5M [03:10&lt;05:09, 93.1kB/s]
+ 34%|###3      | 14.1M/41.5M [03:11&lt;05:09, 93.0kB/s]
+ 34%|###3      | 14.1M/41.5M [03:11&lt;04:29, 107kB/s]
+ 34%|###3      | 14.1M/41.5M [03:11&lt;04:39, 103kB/s]
+ 34%|###4      | 14.1M/41.5M [03:11&lt;04:48, 99.7kB/s]
+ 34%|###4      | 14.1M/41.5M [03:11&lt;04:17, 111kB/s]
+ 34%|###4      | 14.1M/41.5M [03:12&lt;04:30, 106kB/s]
+ 34%|###4      | 14.2M/41.5M [03:12&lt;05:00, 95.3kB/s]
+ 34%|###4      | 14.2M/41.5M [03:12&lt;04:43, 101kB/s]
+ 34%|###4      | 14.2M/41.5M [03:12&lt;04:50, 98.6kB/s]
+ 34%|###4      | 14.2M/41.5M [03:12&lt;06:25, 74.2kB/s]
+ 34%|###4      | 14.2M/41.5M [03:13&lt;07:54, 60.3kB/s]
+ 34%|###4      | 14.3M/41.5M [03:13&lt;05:38, 84.3kB/s]
+ 34%|###4      | 14.3M/41.5M [03:13&lt;05:48, 81.9kB/s]
+ 34%|###4      | 14.3M/41.5M [03:14&lt;06:36, 72.0kB/s]
+ 34%|###4      | 14.3M/41.5M [03:14&lt;08:54, 53.3kB/s]
+ 35%|###4      | 14.3M/41.5M [03:14&lt;06:10, 76.8kB/s]
+ 35%|###4      | 14.4M/41.5M [03:15&lt;08:36, 55.1kB/s]
+ 35%|###4      | 14.4M/41.5M [03:15&lt;06:36, 71.7kB/s]
+ 35%|###4      | 14.4M/41.5M [03:15&lt;06:53, 68.8kB/s]
+ 35%|###4      | 14.4M/41.5M [03:16&lt;08:39, 54.7kB/s]
+ 35%|###4      | 14.4M/41.5M [03:16&lt;06:30, 72.6kB/s]
+ 35%|###4      | 14.5M/41.5M [03:16&lt;07:26, 63.5kB/s]
+ 35%|###4      | 14.5M/41.5M [03:17&lt;06:48, 69.3kB/s]
+ 35%|###4      | 14.5M/41.5M [03:17&lt;06:20, 74.5kB/s]
+ 35%|###4      | 14.5M/41.5M [03:17&lt;05:58, 78.9kB/s]
+ 35%|###4      | 14.5M/41.5M [03:17&lt;07:10, 65.7kB/s]
+ 35%|###5      | 14.5M/41.5M [03:17&lt;06:33, 71.8kB/s]
+ 35%|###5      | 14.5M/41.5M [03:18&lt;06:07, 76.9kB/s]
+ 35%|###5      | 14.6M/41.5M [03:18&lt;05:48, 81.0kB/s]
+ 35%|###5      | 14.6M/41.5M [03:18&lt;05:35, 84.1kB/s]
+ 35%|###5      | 14.6M/41.5M [03:18&lt;05:25, 86.5kB/s]
+ 35%|###5      | 14.6M/41.5M [03:18&lt;06:50, 68.7kB/s]
+ 35%|###5      | 14.6M/41.5M [03:19&lt;04:51, 96.7kB/s]
+ 35%|###5      | 14.7M/41.5M [03:19&lt;04:54, 95.7kB/s]
+ 35%|###5      | 14.7M/41.5M [03:19&lt;04:56, 94.9kB/s]
+ 35%|###5      | 14.7M/41.5M [03:19&lt;06:20, 73.8kB/s]
+ 35%|###5      | 14.7M/41.5M [03:20&lt;06:28, 72.2kB/s]
+ 36%|###5      | 14.7M/41.5M [03:20&lt;05:24, 86.5kB/s]
+ 36%|###5      | 14.8M/41.5M [03:20&lt;05:18, 88.0kB/s]
+ 36%|###5      | 14.8M/41.5M [03:20&lt;05:14, 89.2kB/s]
+ 36%|###5      | 14.8M/41.5M [03:21&lt;06:32, 71.3kB/s]
+ 36%|###5      | 14.8M/41.5M [03:21&lt;07:07, 65.5kB/s]
+ 36%|###5      | 14.8M/41.5M [03:21&lt;08:03, 57.9kB/s]
+ 36%|###5      | 14.8M/41.5M [03:22&lt;08:25, 55.3kB/s]
+ 36%|###5      | 14.9M/41.5M [03:22&lt;07:31, 61.9kB/s]
+ 36%|###5      | 14.9M/41.5M [03:22&lt;07:51, 59.1kB/s]
+ 36%|###5      | 14.9M/41.5M [03:22&lt;08:13, 56.6kB/s]
+ 36%|###5      | 14.9M/41.5M [03:23&lt;07:11, 64.6kB/s]
+ 36%|###5      | 14.9M/41.5M [03:23&lt;07:42, 60.2kB/s]
+ 36%|###5      | 14.9M/41.5M [03:23&lt;08:11, 56.7kB/s]
+ 36%|###5      | 14.9M/41.5M [03:23&lt;06:59, 66.3kB/s]
+ 36%|###5      | 14.9M/41.5M [03:23&lt;07:37, 60.8kB/s]
+ 36%|###6      | 14.9M/41.5M [03:23&lt;08:10, 56.7kB/s]
+ 36%|###6      | 15.0M/41.5M [03:24&lt;06:54, 67.1kB/s]
+ 36%|###6      | 15.0M/41.5M [03:24&lt;07:35, 61.1kB/s]
+ 36%|###6      | 15.0M/41.5M [03:24&lt;06:35, 70.4kB/s]
+ 36%|###6      | 15.0M/41.5M [03:24&lt;07:18, 63.3kB/s]
+ 36%|###6      | 15.0M/41.5M [03:24&lt;06:25, 72.1kB/s]
+ 36%|###6      | 15.0M/41.5M [03:25&lt;05:54, 78.2kB/s]
+ 36%|###6      | 15.0M/41.5M [03:25&lt;05:36, 82.6kB/s]
+ 36%|###6      | 15.0M/41.5M [03:25&lt;07:01, 65.8kB/s]
+ 36%|###6      | 15.1M/41.5M [03:25&lt;09:27, 48.9kB/s]
+ 36%|###6      | 15.1M/41.5M [03:26&lt;05:50, 79.0kB/s]
+ 36%|###6      | 15.1M/41.5M [03:26&lt;05:36, 82.3kB/s]
+ 36%|###6      | 15.1M/41.5M [03:26&lt;08:11, 56.2kB/s]
+ 37%|###6      | 15.1M/41.5M [03:26&lt;05:39, 81.3kB/s]
+ 37%|###6      | 15.2M/41.5M [03:27&lt;05:29, 83.8kB/s]
+ 37%|###6      | 15.2M/41.5M [03:27&lt;08:11, 56.1kB/s]
+ 37%|###6      | 15.2M/41.5M [03:27&lt;06:10, 74.3kB/s]
+ 37%|###6      | 15.2M/41.5M [03:28&lt;07:06, 64.5kB/s]
+ 37%|###6      | 15.2M/41.5M [03:28&lt;06:32, 70.2kB/s]
+ 37%|###6      | 15.2M/41.5M [03:28&lt;07:27, 61.5kB/s]
+ 37%|###6      | 15.3M/41.5M [03:28&lt;06:44, 68.0kB/s]
+ 37%|###6      | 15.3M/41.5M [03:29&lt;06:13, 73.6kB/s]
+ 37%|###6      | 15.3M/41.5M [03:29&lt;05:50, 78.3kB/s]
+ 37%|###6      | 15.3M/41.5M [03:29&lt;05:34, 82.1kB/s]
+ 37%|###6      | 15.3M/41.5M [03:29&lt;05:22, 85.0kB/s]
+ 37%|###6      | 15.3M/41.5M [03:30&lt;08:10, 55.9kB/s]
+ 37%|###7      | 15.4M/41.5M [03:30&lt;07:33, 60.4kB/s]
+ 37%|###7      | 15.4M/41.5M [03:30&lt;06:01, 75.7kB/s]
+ 37%|###7      | 15.4M/41.5M [03:31&lt;11:02, 41.3kB/s]
+ 37%|###7      | 15.4M/41.5M [03:31&lt;08:30, 53.5kB/s]
+ 37%|###7      | 15.4M/41.5M [03:32&lt;10:03, 45.2kB/s]
+ 37%|###7      | 15.5M/41.5M [03:32&lt;10:01, 45.4kB/s]
+ 37%|###7      | 15.5M/41.5M [03:32&lt;09:58, 45.6kB/s]
+ 37%|###7      | 15.5M/41.5M [03:32&lt;09:56, 45.7kB/s]
+ 37%|###7      | 15.5M/41.5M [03:33&lt;09:54, 45.9kB/s]
+ 37%|###7      | 15.5M/41.5M [03:33&lt;09:52, 46.0kB/s]
+ 37%|###7      | 15.5M/41.5M [03:33&lt;09:51, 46.1kB/s]
+ 37%|###7      | 15.5M/41.5M [03:33&lt;12:28, 36.4kB/s]
+ 37%|###7      | 15.5M/41.5M [03:34&lt;11:18, 40.2kB/s]
+ 37%|###7      | 15.5M/41.5M [03:34&lt;10:56, 41.5kB/s]
+ 37%|###7      | 15.5M/41.5M [03:34&lt;10:38, 42.6kB/s]
+ 37%|###7      | 15.5M/41.5M [03:34&lt;08:13, 55.1kB/s]
+ 37%|###7      | 15.6M/41.5M [03:34&lt;10:50, 41.8kB/s]
+ 38%|###7      | 15.6M/41.5M [03:35&lt;08:54, 50.8kB/s]
+ 38%|###7      | 15.6M/41.5M [03:35&lt;09:05, 49.8kB/s]
+ 38%|###7      | 15.6M/41.5M [03:35&lt;08:43, 51.9kB/s]
+ 38%|###7      | 15.6M/41.5M [03:35&lt;08:58, 50.4kB/s]
+ 38%|###7      | 15.6M/41.5M [03:35&lt;09:11, 49.3kB/s]
+ 38%|###7      | 15.6M/41.5M [03:36&lt;09:20, 48.4kB/s]
+ 38%|###7      | 15.6M/41.5M [03:36&lt;07:22, 61.3kB/s]
+ 38%|###7      | 15.6M/41.5M [03:36&lt;07:56, 56.9kB/s]
+ 38%|###7      | 15.6M/41.5M [03:36&lt;06:41, 67.5kB/s]
+ 38%|###7      | 15.7M/41.5M [03:36&lt;07:22, 61.3kB/s]
+ 38%|###7      | 15.7M/41.5M [03:36&lt;06:23, 70.7kB/s]
+ 38%|###7      | 15.7M/41.5M [03:37&lt;07:06, 63.4kB/s]
+ 38%|###7      | 15.7M/41.5M [03:37&lt;08:06, 55.6kB/s]
+ 38%|###7      | 15.7M/41.5M [03:37&lt;06:15, 72.0kB/s]
+ 38%|###7      | 15.7M/41.5M [03:37&lt;06:28, 69.6kB/s]
+ 38%|###7      | 15.7M/41.5M [03:38&lt;07:33, 59.6kB/s]
+ 38%|###7      | 15.8M/41.5M [03:38&lt;06:22, 70.6kB/s]
+ 38%|###8      | 15.8M/41.5M [03:38&lt;06:32, 68.7kB/s]
+ 38%|###8      | 15.8M/41.5M [03:38&lt;05:59, 75.0kB/s]
+ 38%|###8      | 15.8M/41.5M [03:39&lt;08:29, 52.9kB/s]
+ 38%|###8      | 15.8M/41.5M [03:39&lt;06:08, 73.0kB/s]
+ 38%|###8      | 15.8M/41.5M [03:39&lt;06:29, 69.0kB/s]
+ 38%|###8      | 15.8M/41.5M [03:39&lt;06:34, 68.2kB/s]
+ 38%|###8      | 15.9M/41.5M [03:39&lt;05:59, 74.8kB/s]
+ 38%|###8      | 15.9M/41.5M [03:40&lt;08:32, 52.4kB/s]
+ 38%|###8      | 15.9M/41.5M [03:40&lt;06:07, 72.9kB/s]
+ 38%|###8      | 15.9M/41.5M [03:40&lt;05:45, 77.7kB/s]
+ 38%|###8      | 15.9M/41.5M [03:40&lt;06:34, 68.0kB/s]
+ 38%|###8      | 15.9M/41.5M [03:41&lt;06:02, 73.9kB/s]
+ 38%|###8      | 15.9M/41.5M [03:41&lt;08:22, 53.3kB/s]
+ 38%|###8      | 16.0M/41.5M [03:41&lt;06:08, 72.6kB/s]
+ 39%|###8      | 16.0M/41.5M [03:42&lt;08:32, 52.2kB/s]
+ 39%|###8      | 16.0M/41.5M [03:42&lt;07:12, 61.8kB/s]
+ 39%|###8      | 16.0M/41.5M [03:42&lt;07:54, 56.3kB/s]
+ 39%|###8      | 16.0M/41.5M [03:42&lt;08:12, 54.3kB/s]
+ 39%|###8      | 16.0M/41.5M [03:42&lt;07:04, 62.9kB/s]
+ 39%|###8      | 16.0M/41.5M [03:43&lt;09:23, 47.4kB/s]
+ 39%|###8      | 16.1M/41.5M [03:43&lt;07:45, 57.3kB/s]
+ 39%|###8      | 16.1M/41.5M [03:43&lt;08:07, 54.7kB/s]
+ 39%|###8      | 16.1M/41.5M [03:43&lt;08:39, 51.3kB/s]
+ 39%|###8      | 16.1M/41.5M [03:44&lt;07:19, 60.6kB/s]
+ 39%|###8      | 16.1M/41.5M [03:44&lt;07:45, 57.2kB/s]
+ 39%|###8      | 16.1M/41.5M [03:44&lt;08:09, 54.4kB/s]
+ 39%|###8      | 16.1M/41.5M [03:44&lt;06:50, 64.7kB/s]
+ 39%|###8      | 16.1M/41.5M [03:45&lt;15:59, 27.7kB/s]
+ 39%|###9      | 16.2M/41.5M [03:45&lt;06:29, 68.0kB/s]
+ 39%|###9      | 16.2M/41.5M [03:45&lt;06:13, 70.9kB/s]
+ 39%|###9      | 16.2M/41.5M [03:46&lt;08:06, 54.5kB/s]
+ 39%|###9      | 16.2M/41.5M [03:46&lt;06:22, 69.3kB/s]
+ 39%|###9      | 16.3M/41.5M [03:47&lt;07:09, 61.6kB/s]
+ 39%|###9      | 16.3M/41.5M [03:47&lt;07:46, 56.7kB/s]
+ 39%|###9      | 16.3M/41.5M [03:47&lt;07:15, 60.6kB/s]
+ 39%|###9      | 16.3M/41.5M [03:47&lt;07:37, 57.7kB/s]
+ 39%|###9      | 16.3M/41.5M [03:47&lt;07:31, 58.5kB/s]
+ 39%|###9      | 16.3M/41.5M [03:48&lt;07:56, 55.4kB/s]
+ 39%|###9      | 16.3M/41.5M [03:48&lt;08:50, 49.7kB/s]
+ 39%|###9      | 16.3M/41.5M [03:48&lt;08:25, 52.2kB/s]
+ 39%|###9      | 16.3M/41.5M [03:48&lt;07:22, 59.6kB/s]
+ 39%|###9      | 16.4M/41.5M [03:48&lt;07:18, 60.1kB/s]
+ 39%|###9      | 16.4M/41.5M [03:48&lt;07:50, 56.0kB/s]
+ 39%|###9      | 16.4M/41.5M [03:49&lt;08:15, 53.1kB/s]
+ 39%|###9      | 16.4M/41.5M [03:49&lt;06:45, 65.0kB/s]
+ 40%|###9      | 16.4M/41.5M [03:49&lt;07:23, 59.4kB/s]
+ 40%|###9      | 16.4M/41.5M [03:49&lt;06:44, 65.0kB/s]
+ 40%|###9      | 16.4M/41.5M [03:49&lt;06:52, 63.8kB/s]
+ 40%|###9      | 16.4M/41.5M [03:50&lt;06:01, 72.6kB/s]
+ 40%|###9      | 16.4M/41.5M [03:50&lt;05:56, 73.6kB/s]
+ 40%|###9      | 16.5M/41.5M [03:50&lt;05:31, 79.2kB/s]
+ 40%|###9      | 16.5M/41.5M [03:50&lt;05:51, 74.5kB/s]
+ 40%|###9      | 16.5M/41.5M [03:50&lt;07:07, 61.3kB/s]
+ 40%|###9      | 16.5M/41.5M [03:51&lt;04:59, 87.6kB/s]
+ 40%|###9      | 16.5M/41.5M [03:51&lt;04:54, 88.9kB/s]
+ 40%|###9      | 16.5M/41.5M [03:51&lt;05:12, 83.8kB/s]
+ 40%|###9      | 16.6M/41.5M [03:51&lt;06:01, 72.4kB/s]
+ 40%|###9      | 16.6M/41.5M [03:51&lt;06:34, 66.2kB/s]
+ 40%|###9      | 16.6M/41.5M [03:52&lt;05:58, 72.9kB/s]
+ 40%|####      | 16.6M/41.5M [03:52&lt;05:33, 78.2kB/s]
+ 40%|####      | 16.6M/41.5M [03:52&lt;05:17, 82.2kB/s]
+ 40%|####      | 16.6M/41.5M [03:52&lt;05:05, 85.2kB/s]
+ 40%|####      | 16.6M/41.5M [03:53&lt;06:24, 67.8kB/s]
+ 40%|####      | 16.7M/41.5M [03:53&lt;05:52, 73.9kB/s]
+ 40%|####      | 16.7M/41.5M [03:53&lt;05:30, 78.8kB/s]
+ 40%|####      | 16.7M/41.5M [03:53&lt;05:15, 82.5kB/s]
+ 40%|####      | 16.7M/41.5M [03:53&lt;06:28, 66.8kB/s]
+ 40%|####      | 16.7M/41.5M [03:54&lt;05:09, 83.8kB/s]
+ 40%|####      | 16.8M/41.5M [03:54&lt;05:04, 85.1kB/s]
+ 40%|####      | 16.8M/41.5M [03:54&lt;05:13, 82.6kB/s]
+ 40%|####      | 16.8M/41.5M [03:54&lt;05:03, 85.3kB/s]
+ 40%|####      | 16.8M/41.5M [03:54&lt;04:39, 92.6kB/s]
+ 41%|####      | 16.8M/41.5M [03:55&lt;04:56, 87.3kB/s]
+ 41%|####      | 16.8M/41.5M [03:55&lt;04:51, 88.9kB/s]
+ 41%|####      | 16.8M/41.5M [03:55&lt;04:47, 90.0kB/s]
+ 41%|####      | 16.9M/41.5M [03:55&lt;04:44, 90.8kB/s]
+ 41%|####      | 16.9M/41.5M [03:55&lt;05:44, 74.9kB/s]
+ 41%|####      | 16.9M/41.5M [03:56&lt;04:45, 90.3kB/s]
+ 41%|####      | 16.9M/41.5M [03:56&lt;04:43, 91.0kB/s]
+ 41%|####      | 16.9M/41.5M [03:56&lt;05:17, 81.1kB/s]
+ 41%|####      | 16.9M/41.5M [03:56&lt;07:24, 57.9kB/s]
+ 41%|####      | 17.0M/41.5M [03:57&lt;05:31, 77.6kB/s]
+ 41%|####      | 17.0M/41.5M [03:57&lt;06:30, 65.8kB/s]
+ 41%|####      | 17.0M/41.5M [03:57&lt;05:58, 71.5kB/s]
+ 41%|####1     | 17.0M/41.5M [03:57&lt;05:35, 76.5kB/s]
+ 41%|####1     | 17.0M/41.5M [03:58&lt;05:18, 80.5kB/s]
+ 41%|####1     | 17.0M/41.5M [03:58&lt;05:06, 83.7kB/s]
+ 41%|####1     | 17.1M/41.5M [03:58&lt;04:57, 86.2kB/s]
+ 41%|####1     | 17.1M/41.5M [03:58&lt;06:12, 68.8kB/s]
+ 41%|####1     | 17.1M/41.5M [03:58&lt;05:43, 74.5kB/s]
+ 41%|####1     | 17.1M/41.5M [03:59&lt;05:23, 79.1kB/s]
+ 41%|####1     | 17.1M/41.5M [03:59&lt;05:08, 82.7kB/s]
+ 41%|####1     | 17.1M/41.5M [03:59&lt;04:58, 85.5kB/s]
+ 41%|####1     | 17.2M/41.5M [03:59&lt;04:13, 101kB/s]
+ 41%|####1     | 17.2M/41.5M [03:59&lt;04:37, 92.0kB/s]
+ 41%|####1     | 17.2M/41.5M [03:59&lt;04:18, 98.6kB/s]
+ 41%|####1     | 17.2M/41.5M [04:00&lt;06:50, 62.0kB/s]
+ 42%|####1     | 17.2M/41.5M [04:00&lt;05:31, 76.7kB/s]
+ 42%|####1     | 17.2M/41.5M [04:00&lt;05:16, 80.3kB/s]
+ 42%|####1     | 17.3M/41.5M [04:01&lt;06:19, 66.9kB/s]
+ 42%|####1     | 17.3M/41.5M [04:01&lt;05:49, 72.6kB/s]
+ 42%|####1     | 17.3M/41.5M [04:01&lt;05:27, 77.4kB/s]
+ 42%|####1     | 17.3M/41.5M [04:01&lt;06:31, 64.8kB/s]
+ 42%|####1     | 17.3M/41.5M [04:02&lt;07:17, 57.9kB/s]
+ 42%|####1     | 17.3M/41.5M [04:02&lt;07:59, 52.8kB/s]
+ 42%|####1     | 17.4M/41.5M [04:02&lt;07:58, 52.9kB/s]
+ 42%|####1     | 17.4M/41.5M [04:02&lt;08:11, 51.5kB/s]
+ 42%|####1     | 17.4M/41.5M [04:03&lt;10:19, 40.8kB/s]
+ 42%|####1     | 17.4M/41.5M [04:03&lt;14:17, 29.5kB/s]
+ 42%|####1     | 17.4M/41.5M [04:04&lt;14:44, 28.6kB/s]
+ 42%|####1     | 17.4M/41.5M [04:04&lt;12:01, 35.0kB/s]
+ 42%|####1     | 17.4M/41.5M [04:04&lt;11:24, 36.9kB/s]
+ 42%|####1     | 17.4M/41.5M [04:05&lt;10:52, 38.7kB/s]
+ 42%|####2     | 17.4M/41.5M [04:05&lt;10:25, 40.3kB/s]
+ 42%|####2     | 17.4M/41.5M [04:05&lt;10:04, 41.7kB/s]
+ 42%|####2     | 17.4M/41.5M [04:05&lt;12:14, 34.3kB/s]
+ 42%|####2     | 17.5M/41.5M [04:05&lt;11:21, 37.0kB/s]
+ 42%|####2     | 17.5M/41.5M [04:06&lt;10:41, 39.3kB/s]
+ 42%|####2     | 17.5M/41.5M [04:06&lt;10:13, 41.1kB/s]
+ 42%|####2     | 17.5M/41.5M [04:06&lt;09:52, 42.5kB/s]
+ 42%|####2     | 17.5M/41.5M [04:06&lt;09:37, 43.6kB/s]
+ 42%|####2     | 17.5M/41.5M [04:07&lt;12:07, 34.6kB/s]
+ 42%|####2     | 17.5M/41.5M [04:07&lt;11:12, 37.4kB/s]
+ 42%|####2     | 17.5M/41.5M [04:07&lt;13:15, 31.6kB/s]
+ 42%|####2     | 17.5M/41.5M [04:07&lt;09:13, 45.4kB/s]
+ 42%|####2     | 17.5M/41.5M [04:08&lt;09:08, 45.8kB/s]
+ 42%|####2     | 17.6M/41.5M [04:08&lt;07:27, 56.0kB/s]
+ 42%|####2     | 17.6M/41.5M [04:08&lt;07:46, 53.7kB/s]
+ 42%|####2     | 17.6M/41.5M [04:08&lt;08:03, 51.9kB/s]
+ 42%|####2     | 17.6M/41.5M [04:08&lt;08:17, 50.4kB/s]
+ 42%|####2     | 17.6M/41.5M [04:08&lt;08:28, 49.3kB/s]
+ 42%|####2     | 17.6M/41.5M [04:09&lt;06:46, 61.6kB/s]
+ 42%|####2     | 17.6M/41.5M [04:09&lt;07:17, 57.2kB/s]
+ 42%|####2     | 17.6M/41.5M [04:09&lt;09:57, 41.9kB/s]
+ 42%|####2     | 17.6M/41.5M [04:09&lt;09:42, 43.0kB/s]
+ 42%|####2     | 17.6M/41.5M [04:10&lt;09:30, 43.8kB/s]
+ 43%|####2     | 17.6M/41.5M [04:10&lt;09:21, 44.5kB/s]
+ 43%|####2     | 17.6M/41.5M [04:10&lt;11:49, 35.2kB/s]
+ 43%|####2     | 17.7M/41.5M [04:10&lt;08:31, 48.9kB/s]
+ 43%|####2     | 17.7M/41.5M [04:11&lt;10:48, 38.5kB/s]
+ 43%|####2     | 17.7M/41.5M [04:11&lt;08:13, 50.6kB/s]
+ 43%|####2     | 17.7M/41.5M [04:11&lt;10:25, 39.9kB/s]
+ 43%|####2     | 17.7M/41.5M [04:11&lt;09:50, 42.2kB/s]
+ 43%|####2     | 17.7M/41.5M [04:12&lt;09:38, 43.1kB/s]
+ 43%|####2     | 17.7M/41.5M [04:12&lt;11:36, 35.8kB/s]
+ 43%|####2     | 17.7M/41.5M [04:12&lt;10:56, 38.0kB/s]
+ 43%|####2     | 17.7M/41.5M [04:12&lt;10:24, 39.9kB/s]
+ 43%|####2     | 17.8M/41.5M [04:13&lt;09:59, 41.5kB/s]
+ 43%|####2     | 17.8M/41.5M [04:13&lt;09:41, 42.8kB/s]
+ 43%|####2     | 17.8M/41.5M [04:13&lt;09:28, 43.7kB/s]
+ 43%|####2     | 17.8M/41.5M [04:13&lt;09:19, 44.5kB/s]
+ 43%|####2     | 17.8M/41.5M [04:13&lt;09:12, 45.0kB/s]
+ 43%|####2     | 17.8M/41.5M [04:14&lt;11:45, 35.2kB/s]
+ 43%|####2     | 17.8M/41.5M [04:14&lt;10:54, 37.9kB/s]
+ 43%|####2     | 17.8M/41.5M [04:14&lt;08:27, 49.0kB/s]
+ 43%|####2     | 17.8M/41.5M [04:14&lt;08:33, 48.3kB/s]
+ 43%|####2     | 17.8M/41.5M [04:15&lt;10:58, 37.7kB/s]
+ 43%|####2     | 17.8M/41.5M [04:15&lt;09:49, 42.1kB/s]
+ 43%|####3     | 17.8M/41.5M [04:15&lt;09:33, 43.2kB/s]
+ 43%|####3     | 17.9M/41.5M [04:15&lt;10:49, 38.2kB/s]
+ 43%|####3     | 17.9M/41.5M [04:15&lt;10:15, 40.2kB/s]
+ 43%|####3     | 17.9M/41.5M [04:15&lt;09:51, 41.8kB/s]
+ 43%|####3     | 17.9M/41.5M [04:16&lt;09:34, 43.1kB/s]
+ 43%|####3     | 17.9M/41.5M [04:16&lt;09:22, 44.0kB/s]
+ 43%|####3     | 17.9M/41.5M [04:16&lt;09:13, 44.7kB/s]
+ 43%|####3     | 17.9M/41.5M [04:16&lt;07:54, 52.2kB/s]
+ 43%|####3     | 17.9M/41.5M [04:16&lt;07:12, 57.2kB/s]
+ 43%|####3     | 17.9M/41.5M [04:17&lt;07:38, 54.0kB/s]
+ 43%|####3     | 17.9M/41.5M [04:17&lt;06:16, 65.7kB/s]
+ 43%|####3     | 17.9M/41.5M [04:17&lt;06:52, 59.9kB/s]
+ 43%|####3     | 18.0M/41.5M [04:17&lt;05:53, 69.8kB/s]
+ 43%|####3     | 18.0M/41.5M [04:17&lt;06:58, 58.9kB/s]
+ 43%|####3     | 18.0M/41.5M [04:18&lt;05:12, 78.9kB/s]
+ 43%|####3     | 18.0M/41.5M [04:18&lt;04:58, 82.5kB/s]
+ 43%|####3     | 18.0M/41.5M [04:18&lt;04:48, 85.3kB/s]
+ 43%|####3     | 18.0M/41.5M [04:18&lt;06:00, 68.3kB/s]
+ 44%|####3     | 18.1M/41.5M [04:18&lt;05:31, 74.0kB/s]
+ 44%|####3     | 18.1M/41.5M [04:19&lt;05:11, 78.7kB/s]
+ 44%|####3     | 18.1M/41.5M [04:19&lt;04:57, 82.4kB/s]
+ 44%|####3     | 18.1M/41.5M [04:19&lt;04:47, 85.3kB/s]
+ 44%|####3     | 18.1M/41.5M [04:19&lt;04:40, 87.4kB/s]
+ 44%|####3     | 18.1M/41.5M [04:20&lt;05:54, 69.1kB/s]
+ 44%|####3     | 18.2M/41.5M [04:20&lt;05:00, 81.4kB/s]
+ 44%|####3     | 18.2M/41.5M [04:20&lt;05:05, 80.1kB/s]
+ 44%|####3     | 18.2M/41.5M [04:20&lt;04:53, 83.2kB/s]
+ 44%|####3     | 18.2M/41.5M [04:20&lt;04:44, 85.8kB/s]
+ 44%|####3     | 18.2M/41.5M [04:20&lt;04:38, 87.7kB/s]
+ 44%|####3     | 18.2M/41.5M [04:21&lt;04:33, 89.1kB/s]
+ 44%|####4     | 18.3M/41.5M [04:21&lt;04:30, 90.2kB/s]
+ 44%|####4     | 18.3M/41.5M [04:21&lt;04:27, 90.9kB/s]
+ 44%|####4     | 18.3M/41.5M [04:21&lt;04:25, 91.5kB/s]
+ 44%|####4     | 18.3M/41.5M [04:21&lt;04:24, 91.8kB/s]
+ 44%|####4     | 18.3M/41.5M [04:22&lt;04:23, 92.1kB/s]
+ 44%|####4     | 18.3M/41.5M [04:22&lt;04:06, 98.5kB/s]
+ 44%|####4     | 18.4M/41.5M [04:22&lt;04:10, 96.7kB/s]
+ 44%|####4     | 18.4M/41.5M [04:22&lt;04:13, 95.5kB/s]
+ 44%|####4     | 18.4M/41.5M [04:22&lt;03:42, 109kB/s]
+ 44%|####4     | 18.4M/41.5M [04:22&lt;03:38, 111kB/s]
+ 44%|####4     | 18.4M/41.5M [04:23&lt;03:22, 119kB/s]
+ 44%|####4     | 18.5M/41.5M [04:23&lt;03:23, 119kB/s]
+ 45%|####4     | 18.5M/41.5M [04:23&lt;03:13, 125kB/s]
+ 45%|####4     | 18.5M/41.5M [04:23&lt;03:18, 122kB/s]
+ 45%|####4     | 18.5M/41.5M [04:23&lt;02:58, 135kB/s]
+ 45%|####4     | 18.5M/41.5M [04:23&lt;02:56, 136kB/s]
+ 45%|####4     | 18.6M/41.5M [04:24&lt;02:55, 137kB/s]
+ 45%|####4     | 18.6M/41.5M [04:24&lt;02:48, 143kB/s]
+ 45%|####4     | 18.6M/41.5M [04:24&lt;02:38, 151kB/s]
+ 45%|####4     | 18.6M/41.5M [04:24&lt;03:32, 113kB/s]
+ 45%|####5     | 18.7M/41.5M [04:25&lt;03:06, 128kB/s]
+ 45%|####5     | 18.7M/41.5M [04:25&lt;02:52, 139kB/s]
+ 45%|####5     | 18.7M/41.5M [04:25&lt;03:09, 126kB/s]
+ 45%|####5     | 18.8M/41.5M [04:25&lt;03:03, 130kB/s]
+ 45%|####5     | 18.8M/41.5M [04:25&lt;03:19, 119kB/s]
+ 45%|####5     | 18.8M/41.5M [04:26&lt;04:35, 86.5kB/s]
+ 45%|####5     | 18.8M/41.5M [04:26&lt;05:35, 70.9kB/s]
+ 45%|####5     | 18.8M/41.5M [04:26&lt;04:50, 81.7kB/s]
+ 45%|####5     | 18.8M/41.5M [04:27&lt;07:56, 49.9kB/s]
+ 45%|####5     | 18.9M/41.5M [04:27&lt;09:22, 42.2kB/s]
+ 45%|####5     | 18.9M/41.5M [04:28&lt;07:00, 56.4kB/s]
+ 46%|####5     | 18.9M/41.5M [04:28&lt;07:23, 53.4kB/s]
+ 46%|####5     | 18.9M/41.5M [04:28&lt;06:32, 60.3kB/s]
+ 46%|####5     | 18.9M/41.5M [04:28&lt;06:51, 57.5kB/s]
+ 46%|####5     | 18.9M/41.5M [04:28&lt;07:09, 55.0kB/s]
+ 46%|####5     | 18.9M/41.5M [04:29&lt;06:08, 64.1kB/s]
+ 46%|####5     | 19.0M/41.5M [04:29&lt;08:19, 47.3kB/s]
+ 46%|####5     | 19.0M/41.5M [04:29&lt;07:01, 56.1kB/s]
+ 46%|####5     | 19.0M/41.5M [04:30&lt;07:17, 54.0kB/s]
+ 46%|####5     | 19.0M/41.5M [04:30&lt;06:13, 63.1kB/s]
+ 46%|####5     | 19.0M/41.5M [04:30&lt;08:41, 45.2kB/s]
+ 46%|####5     | 19.0M/41.5M [04:30&lt;08:38, 45.4kB/s]
+ 46%|####5     | 19.0M/41.5M [04:31&lt;10:38, 36.9kB/s]
+ 46%|####5     | 19.0M/41.5M [04:31&lt;12:14, 32.1kB/s]
+ 46%|####5     | 19.0M/41.5M [04:31&lt;11:13, 35.0kB/s]
+ 46%|####5     | 19.0M/41.5M [04:32&lt;15:46, 24.9kB/s]
+ 46%|####5     | 19.0M/41.5M [04:32&lt;18:29, 21.2kB/s]
+ 46%|####5     | 19.1M/41.5M [04:33&lt;15:52, 24.7kB/s]
+ 46%|####5     | 19.1M/41.5M [04:33&lt;12:32, 31.2kB/s]
+ 46%|####6     | 19.1M/41.5M [04:34&lt;14:13, 27.5kB/s]
+ 46%|####6     | 19.1M/41.5M [04:34&lt;14:02, 27.9kB/s]
+ 46%|####6     | 19.1M/41.5M [04:34&lt;14:45, 26.5kB/s]
+ 46%|####6     | 19.1M/41.5M [04:34&lt;13:58, 28.0kB/s]
+ 46%|####6     | 19.1M/41.5M [04:35&lt;14:23, 27.2kB/s]
+ 46%|####6     | 19.1M/41.5M [04:35&lt;15:28, 25.3kB/s]
+ 46%|####6     | 19.1M/41.5M [04:35&lt;13:03, 29.9kB/s]
+ 46%|####6     | 19.1M/41.5M [04:35&lt;12:05, 32.3kB/s]
+ 46%|####6     | 19.1M/41.5M [04:36&lt;13:28, 29.0kB/s]
+ 46%|####6     | 19.2M/41.5M [04:36&lt;11:58, 32.6kB/s]
+ 46%|####6     | 19.2M/41.5M [04:36&lt;13:24, 29.1kB/s]
+ 46%|####6     | 19.2M/41.5M [04:37&lt;11:55, 32.7kB/s]
+ 46%|####6     | 19.2M/41.5M [04:37&lt;13:22, 29.1kB/s]
+ 46%|####6     | 19.2M/41.5M [04:37&lt;10:16, 37.9kB/s]
+ 46%|####6     | 19.2M/41.5M [04:37&lt;10:40, 36.5kB/s]
+ 46%|####6     | 19.2M/41.5M [04:38&lt;10:04, 38.6kB/s]
+ 46%|####6     | 19.2M/41.5M [04:38&lt;09:37, 40.5kB/s]
+ 46%|####6     | 19.2M/41.5M [04:38&lt;09:16, 42.0kB/s]
+ 46%|####6     | 19.2M/41.5M [04:38&lt;11:24, 34.1kB/s]
+ 46%|####6     | 19.2M/41.5M [04:39&lt;10:02, 38.7kB/s]
+ 46%|####6     | 19.3M/41.5M [04:39&lt;09:38, 40.3kB/s]
+ 46%|####6     | 19.3M/41.5M [04:39&lt;09:18, 41.7kB/s]
+ 46%|####6     | 19.3M/41.5M [04:39&lt;11:18, 34.3kB/s]
+ 46%|####6     | 19.3M/41.5M [04:40&lt;10:29, 37.0kB/s]
+ 46%|####6     | 19.3M/41.5M [04:40&lt;09:52, 39.3kB/s]
+ 47%|####6     | 19.3M/41.5M [04:40&lt;09:26, 41.1kB/s]
+ 47%|####6     | 19.3M/41.5M [04:40&lt;09:07, 42.5kB/s]
+ 47%|####6     | 19.3M/41.5M [04:40&lt;08:46, 44.2kB/s]
+ 47%|####6     | 19.3M/41.5M [04:41&lt;08:59, 43.1kB/s]
+ 47%|####6     | 19.4M/41.5M [04:41&lt;08:24, 46.0kB/s]
+ 47%|####6     | 19.4M/41.5M [04:41&lt;08:46, 44.1kB/s]
+ 47%|####6     | 19.4M/41.5M [04:42&lt;08:40, 44.6kB/s]
+ 47%|####6     | 19.4M/41.5M [04:42&lt;08:35, 45.0kB/s]
+ 47%|####6     | 19.4M/41.5M [04:42&lt;08:31, 45.3kB/s]
+ 47%|####6     | 19.4M/41.5M [04:42&lt;08:27, 45.6kB/s]
+ 47%|####6     | 19.4M/41.5M [04:42&lt;08:25, 45.8kB/s]
+ 47%|####6     | 19.4M/41.5M [04:42&lt;08:23, 46.0kB/s]
+ 47%|####6     | 19.4M/41.5M [04:43&lt;07:49, 49.3kB/s]
+ 47%|####6     | 19.4M/41.5M [04:43&lt;07:58, 48.4kB/s]
+ 47%|####6     | 19.4M/41.5M [04:43&lt;10:30, 36.7kB/s]
+ 47%|####6     | 19.5M/41.5M [04:43&lt;06:32, 58.8kB/s]
+ 47%|####6     | 19.5M/41.5M [04:43&lt;06:54, 55.7kB/s]
+ 47%|####6     | 19.5M/41.5M [04:44&lt;05:52, 65.5kB/s]
+ 47%|####6     | 19.5M/41.5M [04:44&lt;05:16, 73.0kB/s]
+ 47%|####6     | 19.5M/41.5M [04:44&lt;05:52, 65.5kB/s]
+ 47%|####7     | 19.5M/41.5M [04:44&lt;05:14, 73.3kB/s]
+ 47%|####7     | 19.5M/41.5M [04:44&lt;04:51, 79.0kB/s]
+ 47%|####7     | 19.5M/41.5M [04:45&lt;04:37, 83.0kB/s]
+ 47%|####7     | 19.6M/41.5M [04:45&lt;04:27, 85.9kB/s]
+ 47%|####7     | 19.6M/41.5M [04:45&lt;04:21, 87.9kB/s]
+ 47%|####7     | 19.6M/41.5M [04:45&lt;04:16, 89.4kB/s]
+ 47%|####7     | 19.6M/41.5M [04:45&lt;05:29, 69.6kB/s]
+ 47%|####7     | 19.6M/41.5M [04:46&lt;05:04, 75.3kB/s]
+ 47%|####7     | 19.6M/41.5M [04:46&lt;06:01, 63.3kB/s]
+ 47%|####7     | 19.7M/41.5M [04:46&lt;04:59, 76.3kB/s]
+ 47%|####7     | 19.7M/41.5M [04:46&lt;05:35, 68.1kB/s]
+ 47%|####7     | 19.7M/41.5M [04:47&lt;05:27, 69.8kB/s]
+ 47%|####7     | 19.7M/41.5M [04:47&lt;05:34, 68.2kB/s]
+ 48%|####7     | 19.7M/41.5M [04:47&lt;05:25, 70.2kB/s]
+ 48%|####7     | 19.7M/41.5M [04:47&lt;05:33, 68.3kB/s]
+ 48%|####7     | 19.7M/41.5M [04:47&lt;05:03, 75.2kB/s]
+ 48%|####7     | 19.8M/41.5M [04:48&lt;05:03, 75.0kB/s]
+ 48%|####7     | 19.8M/41.5M [04:48&lt;06:04, 62.6kB/s]
+ 48%|####7     | 19.8M/41.5M [04:48&lt;05:48, 65.2kB/s]
+ 48%|####7     | 19.8M/41.5M [04:48&lt;05:03, 74.9kB/s]
+ 48%|####7     | 19.8M/41.5M [04:49&lt;05:56, 63.7kB/s]
+ 48%|####7     | 19.8M/41.5M [04:49&lt;06:18, 59.9kB/s]
+ 48%|####7     | 19.8M/41.5M [04:49&lt;06:40, 56.7kB/s]
+ 48%|####7     | 19.9M/41.5M [04:49&lt;08:46, 43.1kB/s]
+ 48%|####7     | 19.9M/41.5M [04:50&lt;06:23, 59.2kB/s]
+ 48%|####7     | 19.9M/41.5M [04:50&lt;06:45, 55.8kB/s]
+ 48%|####7     | 19.9M/41.5M [04:50&lt;06:18, 59.8kB/s]
+ 48%|####7     | 19.9M/41.5M [04:50&lt;06:41, 56.3kB/s]
+ 48%|####7     | 19.9M/41.5M [04:51&lt;08:56, 42.2kB/s]
+ 48%|####8     | 19.9M/41.5M [04:51&lt;09:37, 39.1kB/s]
+ 48%|####8     | 19.9M/41.5M [04:52&lt;10:33, 35.7kB/s]
+ 48%|####8     | 20.0M/41.5M [04:52&lt;11:06, 33.9kB/s]
+ 48%|####8     | 20.0M/41.5M [04:53&lt;13:41, 27.5kB/s]
+ 48%|####8     | 20.0M/41.5M [04:53&lt;16:01, 23.5kB/s]
+ 48%|####8     | 20.0M/41.5M [04:53&lt;16:03, 23.4kB/s]
+ 48%|####8     | 20.0M/41.5M [04:54&lt;16:05, 23.4kB/s]
+ 48%|####8     | 20.0M/41.5M [04:54&lt;13:57, 26.9kB/s]
+ 48%|####8     | 20.0M/41.5M [04:54&lt;12:20, 30.4kB/s]
+ 48%|####8     | 20.0M/41.5M [04:55&lt;13:25, 28.0kB/s]
+ 48%|####8     | 20.0M/41.5M [04:55&lt;11:53, 31.6kB/s]
+ 48%|####8     | 20.0M/41.5M [04:55&lt;10:46, 34.8kB/s]
+ 48%|####8     | 20.0M/41.5M [04:55&lt;07:42, 48.6kB/s]
+ 48%|####8     | 20.0M/41.5M [04:55&lt;07:48, 48.0kB/s]
+ 48%|####8     | 20.1M/41.5M [04:55&lt;07:52, 47.6kB/s]
+ 48%|####8     | 20.1M/41.5M [04:56&lt;06:14, 60.0kB/s]
+ 48%|####8     | 20.1M/41.5M [04:56&lt;06:39, 56.2kB/s]
+ 48%|####8     | 20.1M/41.5M [04:56&lt;07:00, 53.4kB/s]
+ 48%|####8     | 20.1M/41.5M [04:56&lt;07:26, 50.2kB/s]
+ 49%|####8     | 20.1M/41.5M [04:57&lt;06:27, 57.8kB/s]
+ 49%|####8     | 20.1M/41.5M [04:57&lt;04:56, 75.6kB/s]
+ 49%|####8     | 20.2M/41.5M [04:57&lt;04:40, 79.7kB/s]
+ 49%|####8     | 20.2M/41.5M [04:57&lt;05:38, 66.1kB/s]
+ 49%|####8     | 20.2M/41.5M [04:58&lt;06:02, 61.7kB/s]
+ 49%|####8     | 20.2M/41.5M [04:58&lt;09:31, 39.0kB/s]
+ 49%|####8     | 20.2M/41.5M [04:59&lt;10:24, 35.7kB/s]
+ 49%|####8     | 20.2M/41.5M [04:59&lt;13:10, 28.2kB/s]
+ 49%|####8     | 20.2M/41.5M [04:59&lt;12:00, 30.9kB/s]
+ 49%|####8     | 20.2M/41.5M [05:00&lt;12:58, 28.6kB/s]
+ 49%|####8     | 20.2M/41.5M [05:00&lt;11:41, 31.7kB/s]
+ 49%|####8     | 20.2M/41.5M [05:00&lt;12:51, 28.9kB/s]
+ 49%|####8     | 20.3M/41.5M [05:00&lt;11:30, 32.3kB/s]
+ 49%|####8     | 20.3M/41.5M [05:01&lt;10:30, 35.3kB/s]
+ 49%|####8     | 20.3M/41.5M [05:01&lt;09:46, 37.9kB/s]
+ 49%|####8     | 20.3M/41.5M [05:01&lt;09:15, 40.1kB/s]
+ 49%|####8     | 20.3M/41.5M [05:01&lt;08:52, 41.7kB/s]
+ 49%|####8     | 20.3M/41.5M [05:01&lt;08:36, 43.0kB/s]
+ 49%|####8     | 20.3M/41.5M [05:02&lt;09:16, 39.9kB/s]
+ 49%|####8     | 20.3M/41.5M [05:02&lt;08:02, 46.0kB/s]
+ 49%|####8     | 20.3M/41.5M [05:02&lt;08:01, 46.1kB/s]
+ 49%|####8     | 20.3M/41.5M [05:02&lt;08:00, 46.2kB/s]
+ 49%|####9     | 20.3M/41.5M [05:02&lt;07:59, 46.2kB/s]
+ 49%|####9     | 20.4M/41.5M [05:02&lt;06:08, 60.1kB/s]
+ 49%|####9     | 20.4M/41.5M [05:03&lt;06:35, 56.0kB/s]
+ 49%|####9     | 20.4M/41.5M [05:03&lt;05:30, 67.0kB/s]
+ 49%|####9     | 20.4M/41.5M [05:03&lt;06:03, 60.8kB/s]
+ 49%|####9     | 20.4M/41.5M [05:03&lt;06:31, 56.5kB/s]
+ 49%|####9     | 20.4M/41.5M [05:03&lt;05:28, 67.3kB/s]
+ 49%|####9     | 20.4M/41.5M [05:03&lt;06:01, 61.1kB/s]
+ 49%|####9     | 20.4M/41.5M [05:04&lt;06:30, 56.6kB/s]
+ 49%|####9     | 20.4M/41.5M [05:04&lt;05:27, 67.5kB/s]
+ 49%|####9     | 20.4M/41.5M [05:04&lt;06:00, 61.2kB/s]
+ 49%|####9     | 20.5M/41.5M [05:04&lt;05:12, 70.6kB/s]
+ 49%|####9     | 20.5M/41.5M [05:04&lt;04:45, 77.3kB/s]
+ 49%|####9     | 20.5M/41.5M [05:04&lt;04:28, 81.9kB/s]
+ 49%|####9     | 20.5M/41.5M [05:05&lt;05:08, 71.3kB/s]
+ 49%|####9     | 20.5M/41.5M [05:05&lt;04:42, 77.7kB/s]
+ 49%|####9     | 20.5M/41.5M [05:05&lt;04:27, 82.2kB/s]
+ 50%|####9     | 20.5M/41.5M [05:05&lt;04:17, 85.3kB/s]
+ 50%|####9     | 20.6M/41.5M [05:05&lt;04:10, 87.6kB/s]
+ 50%|####9     | 20.6M/41.5M [05:06&lt;04:06, 89.1kB/s]
+ 50%|####9     | 20.6M/41.5M [05:06&lt;04:02, 90.2kB/s]
+ 50%|####9     | 20.6M/41.5M [05:06&lt;04:00, 91.0kB/s]
+ 50%|####9     | 20.6M/41.5M [05:06&lt;03:58, 91.6kB/s]
+ 50%|####9     | 20.6M/41.5M [05:06&lt;03:57, 92.0kB/s]
+ 50%|####9     | 20.7M/41.5M [05:06&lt;03:25, 106kB/s]
+ 50%|####9     | 20.7M/41.5M [05:07&lt;03:33, 102kB/s]
+ 50%|####9     | 20.7M/41.5M [05:07&lt;03:12, 113kB/s]
+ 50%|####9     | 20.7M/41.5M [05:07&lt;03:00, 121kB/s]
+ 50%|#####     | 20.8M/41.5M [05:07&lt;02:52, 126kB/s]
+ 50%|#####     | 20.8M/41.5M [05:07&lt;04:02, 89.5kB/s]
+ 50%|#####     | 20.8M/41.5M [05:08&lt;02:53, 125kB/s]
+ 50%|#####     | 20.8M/41.5M [05:08&lt;02:48, 128kB/s]
+ 50%|#####     | 20.8M/41.5M [05:08&lt;03:14, 111kB/s]
+ 50%|#####     | 20.9M/41.5M [05:08&lt;03:09, 114kB/s]
+ 50%|#####     | 20.9M/41.5M [05:08&lt;03:35, 100kB/s]
+ 50%|#####     | 20.9M/41.5M [05:09&lt;03:39, 98.3kB/s]
+ 50%|#####     | 20.9M/41.5M [05:09&lt;04:30, 79.7kB/s]
+ 50%|#####     | 20.9M/41.5M [05:09&lt;04:34, 78.5kB/s]
+ 50%|#####     | 20.9M/41.5M [05:09&lt;04:23, 81.7kB/s]
+ 51%|#####     | 21.0M/41.5M [05:09&lt;04:14, 84.6kB/s]
+ 51%|#####     | 21.0M/41.5M [05:10&lt;04:07, 86.9kB/s]
+ 51%|#####     | 21.0M/41.5M [05:10&lt;04:02, 88.5kB/s]
+ 51%|#####     | 21.0M/41.5M [05:10&lt;05:24, 66.1kB/s]
+ 51%|#####     | 21.0M/41.5M [05:10&lt;04:18, 83.0kB/s]
+ 51%|#####     | 21.0M/41.5M [05:11&lt;04:42, 75.8kB/s]
+ 51%|#####     | 21.1M/41.5M [05:11&lt;04:43, 75.5kB/s]
+ 51%|#####     | 21.1M/41.5M [05:11&lt;05:13, 68.4kB/s]
+ 51%|#####     | 21.1M/41.5M [05:11&lt;04:46, 74.7kB/s]
+ 51%|#####     | 21.1M/41.5M [05:11&lt;05:19, 67.1kB/s]
+ 51%|#####     | 21.1M/41.5M [05:12&lt;05:08, 69.3kB/s]
+ 51%|#####     | 21.1M/41.5M [05:12&lt;04:42, 75.7kB/s]
+ 51%|#####     | 21.1M/41.5M [05:12&lt;04:54, 72.6kB/s]
+ 51%|#####     | 21.1M/41.5M [05:12&lt;04:51, 73.1kB/s]
+ 51%|#####     | 21.1M/41.5M [05:12&lt;05:02, 70.6kB/s]
+ 51%|#####1    | 21.2M/41.5M [05:12&lt;04:10, 84.9kB/s]
+ 51%|#####1    | 21.2M/41.5M [05:13&lt;04:04, 87.2kB/s]
+ 51%|#####1    | 21.2M/41.5M [05:13&lt;03:57, 89.4kB/s]
+ 51%|#####1    | 21.2M/41.5M [05:13&lt;03:55, 90.4kB/s]
+ 51%|#####1    | 21.2M/41.5M [05:13&lt;03:37, 97.8kB/s]
+ 51%|#####1    | 21.3M/41.5M [05:14&lt;04:25, 79.9kB/s]
+ 51%|#####1    | 21.3M/41.5M [05:14&lt;03:21, 105kB/s]
+ 51%|#####1    | 21.3M/41.5M [05:14&lt;03:27, 102kB/s]
+ 51%|#####1    | 21.3M/41.5M [05:14&lt;03:31, 100kB/s]
+ 51%|#####1    | 21.3M/41.5M [05:14&lt;03:35, 98.1kB/s]
+ 51%|#####1    | 21.4M/41.5M [05:14&lt;03:24, 103kB/s]
+ 52%|#####1    | 21.4M/41.5M [05:15&lt;03:30, 100kB/s]
+ 52%|#####1    | 21.4M/41.5M [05:15&lt;03:08, 112kB/s]
+ 52%|#####1    | 21.4M/41.5M [05:15&lt;03:18, 106kB/s]
+ 52%|#####1    | 21.4M/41.5M [05:15&lt;03:01, 116kB/s]
+ 52%|#####1    | 21.4M/41.5M [05:15&lt;03:12, 109kB/s]
+ 52%|#####1    | 21.5M/41.5M [05:15&lt;03:21, 104kB/s]
+ 52%|#####1    | 21.5M/41.5M [05:16&lt;03:02, 115kB/s]
+ 52%|#####1    | 21.5M/41.5M [05:16&lt;02:51, 122kB/s]
+ 52%|#####1    | 21.5M/41.5M [05:16&lt;03:31, 98.8kB/s]
+ 52%|#####1    | 21.5M/41.5M [05:16&lt;02:57, 118kB/s]
+ 52%|#####1    | 21.6M/41.5M [05:17&lt;04:05, 85.3kB/s]
+ 52%|#####2    | 21.6M/41.5M [05:17&lt;03:46, 92.3kB/s]
+ 52%|#####2    | 21.6M/41.5M [05:17&lt;05:00, 69.3kB/s]
+ 52%|#####2    | 21.6M/41.5M [05:17&lt;04:39, 74.6kB/s]
+ 52%|#####2    | 21.6M/41.5M [05:17&lt;04:42, 73.8kB/s]
+ 52%|#####2    | 21.6M/41.5M [05:18&lt;05:30, 63.0kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:18&lt;04:58, 69.6kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:18&lt;05:24, 64.0kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:19&lt;07:18, 47.4kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:19&lt;05:05, 68.0kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:19&lt;06:51, 50.5kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:19&lt;06:58, 49.6kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:20&lt;08:45, 39.5kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:20&lt;06:46, 50.9kB/s]
+ 52%|#####2    | 21.7M/41.5M [05:20&lt;06:55, 49.9kB/s]
+ 52%|#####2    | 21.8M/41.5M [05:20&lt;07:02, 49.0kB/s]
+ 52%|#####2    | 21.8M/41.5M [05:20&lt;05:41, 60.5kB/s]
+ 52%|#####2    | 21.8M/41.5M [05:20&lt;06:04, 56.7kB/s]
+ 53%|#####2    | 21.8M/41.5M [05:21&lt;05:29, 62.7kB/s]
+ 53%|#####2    | 21.8M/41.5M [05:21&lt;05:31, 62.2kB/s]
+ 53%|#####2    | 21.8M/41.5M [05:21&lt;05:58, 57.6kB/s]
+ 53%|#####2    | 21.8M/41.5M [05:21&lt;05:24, 63.6kB/s]
+ 53%|#####2    | 21.8M/41.5M [05:21&lt;05:27, 62.9kB/s]
+ 53%|#####2    | 21.8M/41.5M [05:22&lt;04:46, 71.9kB/s]
+ 53%|#####2    | 21.9M/41.5M [05:22&lt;05:20, 64.2kB/s]
+ 53%|#####2    | 21.9M/41.5M [05:22&lt;04:42, 72.8kB/s]
+ 53%|#####2    | 21.9M/41.5M [05:22&lt;04:20, 78.8kB/s]
+ 53%|#####2    | 21.9M/41.5M [05:22&lt;04:07, 83.0kB/s]
+ 53%|#####2    | 21.9M/41.5M [05:22&lt;03:58, 85.9kB/s]
+ 53%|#####2    | 21.9M/41.5M [05:23&lt;03:53, 88.0kB/s]
+ 53%|#####2    | 22.0M/41.5M [05:23&lt;03:18, 103kB/s]
+ 53%|#####2    | 22.0M/41.5M [05:23&lt;03:24, 100kB/s]
+ 53%|#####3    | 22.0M/41.5M [05:23&lt;03:02, 112kB/s]
+ 53%|#####3    | 22.0M/41.5M [05:23&lt;03:12, 106kB/s]
+ 53%|#####3    | 22.0M/41.5M [05:24&lt;04:19, 78.6kB/s]
+ 53%|#####3    | 22.1M/41.5M [05:24&lt;02:54, 117kB/s]
+ 53%|#####3    | 22.1M/41.5M [05:24&lt;03:04, 110kB/s]
+ 53%|#####3    | 22.1M/41.5M [05:24&lt;03:12, 106kB/s]
+ 53%|#####3    | 22.1M/41.5M [05:24&lt;02:56, 115kB/s]
+ 53%|#####3    | 22.1M/41.5M [05:25&lt;03:06, 109kB/s]
+ 53%|#####3    | 22.2M/41.5M [05:25&lt;03:42, 91.1kB/s]
+ 53%|#####3    | 22.2M/41.5M [05:25&lt;02:57, 114kB/s]
+ 54%|#####3    | 22.2M/41.5M [05:25&lt;03:06, 109kB/s]
+ 54%|#####3    | 22.2M/41.5M [05:25&lt;03:04, 110kB/s]
+ 54%|#####3    | 22.2M/41.5M [05:26&lt;03:49, 88.1kB/s]
+ 54%|#####3    | 22.3M/41.5M [05:26&lt;02:53, 116kB/s]
+ 54%|#####3    | 22.3M/41.5M [05:26&lt;03:03, 110kB/s]
+ 54%|#####3    | 22.3M/41.5M [05:26&lt;02:53, 116kB/s]
+ 54%|#####3    | 22.3M/41.5M [05:26&lt;03:08, 106kB/s]
+ 54%|#####3    | 22.3M/41.5M [05:27&lt;03:05, 108kB/s]
+ 54%|#####3    | 22.4M/41.5M [05:27&lt;02:51, 117kB/s]
+ 54%|#####3    | 22.4M/41.5M [05:27&lt;02:44, 122kB/s]
+ 54%|#####4    | 22.4M/41.5M [05:27&lt;02:53, 115kB/s]
+ 54%|#####4    | 22.4M/41.5M [05:27&lt;02:43, 122kB/s]
+ 54%|#####4    | 22.5M/41.5M [05:28&lt;02:37, 127kB/s]
+ 54%|#####4    | 22.5M/41.5M [05:28&lt;02:50, 117kB/s]
+ 54%|#####4    | 22.5M/41.5M [05:28&lt;02:41, 124kB/s]
+ 54%|#####4    | 22.5M/41.5M [05:28&lt;03:41, 89.8kB/s]
+ 54%|#####4    | 22.5M/41.5M [05:28&lt;02:48, 118kB/s]
+ 54%|#####4    | 22.6M/41.5M [05:29&lt;02:58, 111kB/s]
+ 54%|#####4    | 22.6M/41.5M [05:29&lt;03:06, 106kB/s]
+ 54%|#####4    | 22.6M/41.5M [05:29&lt;03:12, 103kB/s]
+ 54%|#####4    | 22.6M/41.5M [05:29&lt;03:18, 99.9kB/s]
+ 55%|#####4    | 22.6M/41.5M [05:29&lt;04:20, 76.0kB/s]
+ 55%|#####4    | 22.6M/41.5M [05:30&lt;04:06, 80.1kB/s]
+ 55%|#####4    | 22.7M/41.5M [05:30&lt;04:14, 77.5kB/s]
+ 55%|#####4    | 22.7M/41.5M [05:30&lt;04:45, 69.0kB/s]
+ 55%|#####4    | 22.7M/41.5M [05:30&lt;04:24, 74.6kB/s]
+ 55%|#####4    | 22.7M/41.5M [05:31&lt;05:11, 63.3kB/s]
+ 55%|#####4    | 22.7M/41.5M [05:31&lt;04:34, 71.7kB/s]
+ 55%|#####4    | 22.7M/41.5M [05:31&lt;04:44, 69.2kB/s]
+ 55%|#####4    | 22.8M/41.5M [05:31&lt;04:23, 74.4kB/s]
+ 55%|#####4    | 22.8M/41.5M [05:32&lt;04:08, 78.9kB/s]
+ 55%|#####4    | 22.8M/41.5M [05:32&lt;03:57, 82.4kB/s]
+ 55%|#####4    | 22.8M/41.5M [05:32&lt;03:50, 85.1kB/s]
+ 55%|#####5    | 22.8M/41.5M [05:32&lt;04:17, 76.1kB/s]
+ 55%|#####5    | 22.8M/41.5M [05:32&lt;04:03, 80.4kB/s]
+ 55%|#####5    | 22.9M/41.5M [05:33&lt;03:53, 83.7kB/s]
+ 55%|#####5    | 22.9M/41.5M [05:33&lt;03:46, 86.2kB/s]
+ 55%|#####5    | 22.9M/41.5M [05:33&lt;04:44, 68.5kB/s]
+ 55%|#####5    | 22.9M/41.5M [05:33&lt;03:48, 85.4kB/s]
+ 55%|#####5    | 22.9M/41.5M [05:33&lt;03:43, 87.3kB/s]
+ 55%|#####5    | 22.9M/41.5M [05:34&lt;03:39, 88.8kB/s]
+ 55%|#####5    | 23.0M/41.5M [05:34&lt;03:36, 89.9kB/s]
+ 55%|#####5    | 23.0M/41.5M [05:34&lt;03:34, 90.7kB/s]
+ 55%|#####5    | 23.0M/41.5M [05:34&lt;03:32, 91.3kB/s]
+ 55%|#####5    | 23.0M/41.5M [05:34&lt;03:28, 92.7kB/s]
+ 55%|#####5    | 23.0M/41.5M [05:35&lt;03:28, 92.8kB/s]
+ 56%|#####5    | 23.0M/41.5M [05:35&lt;03:28, 92.7kB/s]
+ 56%|#####5    | 23.1M/41.5M [05:35&lt;03:02, 106kB/s]
+ 56%|#####5    | 23.1M/41.5M [05:35&lt;03:09, 102kB/s]
+ 56%|#####5    | 23.1M/41.5M [05:35&lt;03:14, 99.4kB/s]
+ 56%|#####5    | 23.1M/41.5M [05:36&lt;03:44, 85.8kB/s]
+ 56%|#####5    | 23.1M/41.5M [05:36&lt;02:54, 110kB/s]
+ 56%|#####5    | 23.2M/41.5M [05:36&lt;03:01, 106kB/s]
+ 56%|#####5    | 23.2M/41.5M [05:36&lt;03:08, 102kB/s]
+ 56%|#####5    | 23.2M/41.5M [05:36&lt;03:12, 99.5kB/s]
+ 56%|#####5    | 23.2M/41.5M [05:37&lt;03:16, 97.5kB/s]
+ 56%|#####5    | 23.2M/41.5M [05:37&lt;03:19, 96.0kB/s]
+ 56%|#####6    | 23.2M/41.5M [05:37&lt;03:21, 95.0kB/s]
+ 56%|#####6    | 23.3M/41.5M [05:37&lt;03:37, 88.1kB/s]
+ 56%|#####6    | 23.3M/41.5M [05:37&lt;03:33, 89.4kB/s]
+ 56%|#####6    | 23.3M/41.5M [05:37&lt;03:31, 90.3kB/s]
+ 56%|#####6    | 23.3M/41.5M [05:38&lt;03:29, 91.0kB/s]
+ 56%|#####6    | 23.3M/41.5M [05:38&lt;03:28, 91.6kB/s]
+ 56%|#####6    | 23.3M/41.5M [05:38&lt;03:27, 91.9kB/s]
+ 56%|#####6    | 23.4M/41.5M [05:38&lt;03:11, 99.4kB/s]
+ 56%|#####6    | 23.4M/41.5M [05:38&lt;03:15, 97.3kB/s]
+ 56%|#####6    | 23.4M/41.5M [05:38&lt;02:52, 110kB/s]
+ 56%|#####6    | 23.4M/41.5M [05:39&lt;02:39, 119kB/s]
+ 56%|#####6    | 23.4M/41.5M [05:39&lt;02:50, 111kB/s]
+ 57%|#####6    | 23.5M/41.5M [05:39&lt;02:38, 120kB/s]
+ 57%|#####6    | 23.5M/41.5M [05:39&lt;02:30, 126kB/s]
+ 57%|#####6    | 23.5M/41.5M [05:39&lt;02:25, 130kB/s]
+ 57%|#####6    | 23.5M/41.5M [05:40&lt;02:22, 132kB/s]
+ 57%|#####6    | 23.5M/41.5M [05:40&lt;03:02, 103kB/s]
+ 57%|#####6    | 23.6M/41.5M [05:40&lt;02:18, 135kB/s]
+ 57%|#####6    | 23.6M/41.5M [05:40&lt;02:25, 129kB/s]
+ 57%|#####6    | 23.6M/41.5M [05:40&lt;02:23, 130kB/s]
+ 57%|#####6    | 23.6M/41.5M [05:40&lt;02:23, 131kB/s]
+ 57%|#####6    | 23.6M/41.5M [05:41&lt;02:25, 129kB/s]
+ 57%|#####7    | 23.7M/41.5M [05:41&lt;02:21, 132kB/s]
+ 57%|#####7    | 23.7M/41.5M [05:41&lt;02:12, 141kB/s]
+ 57%|#####7    | 23.7M/41.5M [05:41&lt;02:13, 140kB/s]
+ 57%|#####7    | 23.7M/41.5M [05:41&lt;02:13, 139kB/s]
+ 57%|#####7    | 23.8M/41.5M [05:41&lt;02:13, 139kB/s]
+ 57%|#####7    | 23.8M/41.5M [05:42&lt;03:02, 102kB/s]
+ 57%|#####7    | 23.8M/41.5M [05:42&lt;02:01, 153kB/s]
+ 57%|#####7    | 23.8M/41.5M [05:42&lt;02:09, 143kB/s]
+ 58%|#####7    | 23.9M/41.5M [05:42&lt;02:29, 123kB/s]
+ 58%|#####7    | 23.9M/41.5M [05:43&lt;02:34, 119kB/s]
+ 58%|#####7    | 23.9M/41.5M [05:43&lt;02:28, 124kB/s]
+ 58%|#####7    | 23.9M/41.5M [05:43&lt;03:02, 101kB/s]
+ 58%|#####7    | 24.0M/41.5M [05:43&lt;02:11, 140kB/s]
+ 58%|#####7    | 24.0M/41.5M [05:43&lt;02:20, 131kB/s]
+ 58%|#####7    | 24.0M/41.5M [05:44&lt;02:17, 133kB/s]
+ 58%|#####7    | 24.0M/41.5M [05:44&lt;02:19, 131kB/s]
+ 58%|#####7    | 24.1M/41.5M [05:44&lt;02:05, 145kB/s]
+ 58%|#####8    | 24.1M/41.5M [05:44&lt;02:11, 139kB/s]
+ 58%|#####8    | 24.1M/41.5M [05:44&lt;02:15, 134kB/s]
+ 58%|#####8    | 24.1M/41.5M [05:44&lt;02:14, 136kB/s]
+ 58%|#####8    | 24.1M/41.5M [05:44&lt;02:01, 150kB/s]
+ 58%|#####8    | 24.2M/41.5M [05:45&lt;01:53, 161kB/s]
+ 58%|#####8    | 24.2M/41.5M [05:45&lt;01:58, 153kB/s]
+ 58%|#####8    | 24.2M/41.5M [05:45&lt;02:05, 144kB/s]
+ 58%|#####8    | 24.2M/41.5M [05:45&lt;02:11, 138kB/s]
+ 58%|#####8    | 24.2M/41.5M [05:45&lt;02:10, 138kB/s]
+ 58%|#####8    | 24.3M/41.5M [05:45&lt;01:59, 152kB/s]
+ 59%|#####8    | 24.3M/41.5M [05:45&lt;01:51, 162kB/s]
+ 59%|#####8    | 24.3M/41.5M [05:46&lt;01:56, 154kB/s]
+ 59%|#####8    | 24.3M/41.5M [05:46&lt;01:51, 162kB/s]
+ 59%|#####8    | 24.4M/41.5M [05:46&lt;01:56, 154kB/s]
+ 59%|#####8    | 24.4M/41.5M [05:46&lt;01:51, 162kB/s]
+ 59%|#####8    | 24.4M/41.5M [05:46&lt;01:46, 169kB/s]
+ 59%|#####8    | 24.4M/41.5M [05:46&lt;01:42, 175kB/s]
+ 59%|#####8    | 24.4M/41.5M [05:46&lt;01:41, 177kB/s]
+ 59%|#####8    | 24.5M/41.5M [05:47&lt;01:43, 172kB/s]
+ 59%|#####9    | 24.5M/41.5M [05:47&lt;01:47, 166kB/s]
+ 59%|#####9    | 24.5M/41.5M [05:47&lt;02:17, 129kB/s]
+ 59%|#####9    | 24.6M/41.5M [05:47&lt;01:38, 181kB/s]
+ 59%|#####9    | 24.6M/41.5M [05:47&lt;01:44, 169kB/s]
+ 59%|#####9    | 24.6M/41.5M [05:48&lt;01:49, 162kB/s]
+ 59%|#####9    | 24.7M/41.5M [05:48&lt;01:44, 169kB/s]
+ 59%|#####9    | 24.7M/41.5M [05:48&lt;01:41, 174kB/s]
+ 60%|#####9    | 24.7M/41.5M [05:48&lt;02:21, 124kB/s]
+ 60%|#####9    | 24.7M/41.5M [05:48&lt;01:55, 152kB/s]
+ 60%|#####9    | 24.8M/41.5M [05:49&lt;01:42, 171kB/s]
+ 60%|#####9    | 24.8M/41.5M [05:49&lt;01:48, 162kB/s]
+ 60%|#####9    | 24.8M/41.5M [05:49&lt;01:52, 155kB/s]
+ 60%|#####9    | 24.8M/41.5M [05:49&lt;02:14, 130kB/s]
+ 60%|#####9    | 24.9M/41.5M [05:49&lt;01:57, 148kB/s]
+ 60%|#####9    | 24.9M/41.5M [05:50&lt;02:39, 109kB/s]
+ 60%|######    | 24.9M/41.5M [05:50&lt;02:23, 121kB/s]
+ 60%|######    | 24.9M/41.5M [05:50&lt;02:28, 117kB/s]
+ 60%|######    | 25.0M/41.5M [05:50&lt;02:37, 110kB/s]
+ 60%|######    | 25.0M/41.5M [05:50&lt;02:44, 106kB/s]
+ 60%|######    | 25.0M/41.5M [05:51&lt;02:49, 102kB/s]
+ 60%|######    | 25.0M/41.5M [05:51&lt;02:37, 109kB/s]
+ 60%|######    | 25.0M/41.5M [05:51&lt;02:40, 108kB/s]
+ 60%|######    | 25.0M/41.5M [05:51&lt;02:31, 114kB/s]
+ 60%|######    | 25.1M/41.5M [05:51&lt;02:35, 111kB/s]
+ 60%|######    | 25.1M/41.5M [05:52&lt;02:28, 116kB/s]
+ 61%|######    | 25.1M/41.5M [05:52&lt;02:20, 123kB/s]
+ 61%|######    | 25.1M/41.5M [05:52&lt;02:26, 117kB/s]
+ 61%|######    | 25.1M/41.5M [05:52&lt;02:18, 124kB/s]
+ 61%|######    | 25.2M/41.5M [05:52&lt;02:13, 129kB/s]
+ 61%|######    | 25.2M/41.5M [05:52&lt;02:09, 132kB/s]
+ 61%|######    | 25.2M/41.5M [05:53&lt;02:00, 141kB/s]
+ 61%|######    | 25.2M/41.5M [05:53&lt;02:01, 141kB/s]
+ 61%|######    | 25.3M/41.5M [05:53&lt;01:59, 142kB/s]
+ 61%|######    | 25.3M/41.5M [05:53&lt;02:00, 141kB/s]
+ 61%|######1   | 25.3M/41.5M [05:53&lt;01:49, 155kB/s]
+ 61%|######1   | 25.4M/41.5M [05:53&lt;01:43, 164kB/s]
+ 61%|######1   | 25.4M/41.5M [05:54&lt;01:47, 156kB/s]
+ 61%|######1   | 25.4M/41.5M [05:54&lt;01:42, 165kB/s]
+ 61%|######1   | 25.4M/41.5M [05:54&lt;01:38, 171kB/s]
+ 61%|######1   | 25.5M/41.5M [05:54&lt;01:35, 176kB/s]
+ 61%|######1   | 25.5M/41.5M [05:54&lt;01:33, 179kB/s]
+ 62%|######1   | 25.5M/41.5M [05:55&lt;01:32, 181kB/s]
+ 62%|######1   | 25.6M/41.5M [05:55&lt;01:25, 196kB/s]
+ 62%|######1   | 25.6M/41.5M [05:55&lt;01:20, 207kB/s]
+ 62%|######1   | 25.7M/41.5M [05:55&lt;01:12, 228kB/s]
+ 62%|######1   | 25.7M/41.5M [05:55&lt;01:08, 243kB/s]
+ 62%|######2   | 25.8M/41.5M [05:55&lt;01:01, 268kB/s]
+ 62%|######2   | 25.8M/41.5M [05:56&lt;00:57, 285kB/s]
+ 62%|######2   | 25.9M/41.5M [05:56&lt;00:52, 311kB/s]
+ 63%|######2   | 26.0M/41.5M [05:56&lt;00:45, 356kB/s]
+ 63%|######2   | 26.0M/41.5M [05:56&lt;00:53, 304kB/s]
+ 63%|######2   | 26.1M/41.5M [05:56&lt;00:39, 411kB/s]
+ 63%|######3   | 26.2M/41.5M [05:56&lt;00:40, 399kB/s]
+ 63%|######3   | 26.2M/41.5M [05:57&lt;00:42, 376kB/s]
+ 63%|######3   | 26.3M/41.5M [05:57&lt;00:53, 298kB/s]
+ 64%|######3   | 26.4M/41.5M [05:57&lt;00:41, 386kB/s]
+ 64%|######3   | 26.4M/41.5M [05:57&lt;00:44, 358kB/s]
+ 64%|######3   | 26.5M/41.5M [05:58&lt;00:57, 274kB/s]
+ 64%|######4   | 26.6M/41.5M [05:58&lt;00:47, 329kB/s]
+ 64%|######4   | 26.6M/41.5M [05:58&lt;00:51, 305kB/s]
+ 64%|######4   | 26.7M/41.5M [05:58&lt;00:52, 298kB/s]
+ 64%|######4   | 26.7M/41.5M [05:58&lt;00:55, 280kB/s]
+ 64%|######4   | 26.8M/41.5M [05:59&lt;00:58, 266kB/s]
+ 65%|######4   | 26.8M/41.5M [05:59&lt;00:57, 270kB/s]
+ 65%|######4   | 26.8M/41.5M [05:59&lt;00:56, 272kB/s]
+ 65%|######4   | 26.9M/41.5M [05:59&lt;00:58, 260kB/s]
+ 65%|######4   | 26.9M/41.5M [05:59&lt;00:57, 266kB/s]
+ 65%|######5   | 27.0M/41.5M [05:59&lt;00:56, 269kB/s]
+ 65%|######5   | 27.0M/41.5M [06:00&lt;00:55, 272kB/s]
+ 65%|######5   | 27.1M/41.5M [06:00&lt;00:58, 260kB/s]
+ 65%|######5   | 27.1M/41.5M [06:00&lt;00:56, 266kB/s]
+ 65%|######5   | 27.2M/41.5M [06:00&lt;00:55, 269kB/s]
+ 66%|######5   | 27.2M/41.5M [06:00&lt;00:55, 272kB/s]
+ 66%|######5   | 27.2M/41.5M [06:01&lt;00:54, 274kB/s]
+ 66%|######5   | 27.3M/41.5M [06:01&lt;00:51, 289kB/s]
+ 66%|######5   | 27.4M/41.5M [06:01&lt;00:51, 286kB/s]
+ 66%|######6   | 27.4M/41.5M [06:01&lt;00:52, 284kB/s]
+ 66%|######6   | 27.5M/41.5M [06:01&lt;00:49, 296kB/s]
+ 66%|######6   | 27.5M/41.5M [06:01&lt;00:50, 291kB/s]
+ 66%|######6   | 27.6M/41.5M [06:02&lt;00:48, 301kB/s]
+ 67%|######6   | 27.6M/41.5M [06:02&lt;00:47, 308kB/s]
+ 67%|######6   | 27.7M/41.5M [06:02&lt;00:48, 299kB/s]
+ 67%|######6   | 27.7M/41.5M [06:02&lt;00:47, 307kB/s]
+ 67%|######6   | 27.8M/41.5M [06:02&lt;00:46, 312kB/s]
+ 67%|######7   | 27.8M/41.5M [06:02&lt;00:43, 330kB/s]
+ 67%|######7   | 27.9M/41.5M [06:03&lt;00:43, 328kB/s]
+ 67%|######7   | 27.9M/41.5M [06:03&lt;00:43, 327kB/s]
+ 67%|######7   | 28.0M/41.5M [06:03&lt;00:41, 340kB/s]
+ 68%|######7   | 28.1M/41.5M [06:03&lt;00:41, 336kB/s]
+ 68%|######7   | 28.1M/41.5M [06:03&lt;00:40, 346kB/s]
+ 68%|######7   | 28.2M/41.5M [06:04&lt;00:39, 354kB/s]
+ 68%|######8   | 28.2M/41.5M [06:04&lt;00:38, 359kB/s]
+ 68%|######8   | 28.3M/41.5M [06:04&lt;00:35, 390kB/s]
+ 68%|######8   | 28.4M/41.5M [06:04&lt;00:34, 399kB/s]
+ 69%|######8   | 28.5M/41.5M [06:04&lt;00:32, 418kB/s]
+ 69%|######8   | 28.6M/41.5M [06:04&lt;00:30, 446kB/s]
+ 69%|######9   | 28.6M/41.5M [06:05&lt;00:28, 465kB/s]
+ 69%|######9   | 28.7M/41.5M [06:05&lt;00:27, 492kB/s]
+ 70%|######9   | 28.8M/41.5M [06:05&lt;00:25, 526kB/s]
+ 70%|######9   | 28.9M/41.5M [06:05&lt;00:23, 563kB/s]
+ 70%|#######   | 29.1M/41.5M [06:05&lt;00:21, 602kB/s]
+ 70%|#######   | 29.2M/41.5M [06:05&lt;00:20, 631kB/s]
+ 71%|#######   | 29.3M/41.5M [06:06&lt;00:18, 678kB/s]
+ 71%|#######   | 29.4M/41.5M [06:06&lt;00:16, 756kB/s]
+ 71%|#######1  | 29.6M/41.5M [06:06&lt;00:15, 829kB/s]
+ 72%|#######1  | 29.7M/41.5M [06:06&lt;00:14, 854kB/s]
+ 72%|#######1  | 29.8M/41.5M [06:06&lt;00:14, 824kB/s]
+ 72%|#######2  | 29.9M/41.5M [06:06&lt;00:14, 856kB/s]
+ 72%|#######2  | 30.1M/41.5M [06:06&lt;00:12, 962kB/s]
+ 73%|#######2  | 30.2M/41.5M [06:07&lt;00:16, 706kB/s]
+ 73%|#######3  | 30.4M/41.5M [06:07&lt;00:11, 1.00MB/s]
+ 74%|#######3  | 30.5M/41.5M [06:07&lt;00:11, 972kB/s]
+ 74%|#######3  | 30.7M/41.5M [06:07&lt;00:11, 1.00MB/s]
+ 74%|#######4  | 30.8M/41.5M [06:07&lt;00:16, 703kB/s]
+ 75%|#######4  | 30.9M/41.5M [06:08&lt;00:13, 819kB/s]
+ 75%|#######4  | 31.1M/41.5M [06:08&lt;00:14, 770kB/s]
+ 75%|#######5  | 31.2M/41.5M [06:08&lt;00:14, 735kB/s]
+ 75%|#######5  | 31.3M/41.5M [06:08&lt;00:14, 756kB/s]
+ 76%|#######5  | 31.4M/41.5M [06:08&lt;00:13, 783kB/s]
+ 76%|#######5  | 31.5M/41.5M [06:08&lt;00:17, 591kB/s]
+ 76%|#######6  | 31.6M/41.5M [06:09&lt;00:14, 715kB/s]
+ 76%|#######6  | 31.7M/41.5M [06:09&lt;00:14, 693kB/s]
+ 77%|#######6  | 31.8M/41.5M [06:09&lt;00:16, 635kB/s]
+ 77%|#######6  | 31.9M/41.5M [06:09&lt;00:16, 610kB/s]
+ 77%|#######7  | 32.0M/41.5M [06:09&lt;00:16, 594kB/s]
+ 77%|#######7  | 32.1M/41.5M [06:09&lt;00:15, 627kB/s]
+ 77%|#######7  | 32.1M/41.5M [06:10&lt;00:16, 605kB/s]
+ 78%|#######7  | 32.2M/41.5M [06:10&lt;00:18, 527kB/s]
+ 78%|#######7  | 32.3M/41.5M [06:10&lt;00:17, 557kB/s]
+ 78%|#######7  | 32.4M/41.5M [06:10&lt;00:16, 592kB/s]
+ 78%|#######8  | 32.4M/41.5M [06:10&lt;00:15, 630kB/s]
+ 78%|#######8  | 32.5M/41.5M [06:10&lt;00:15, 606kB/s]
+ 79%|#######8  | 32.6M/41.5M [06:10&lt;00:16, 562kB/s]
+ 79%|#######8  | 32.7M/41.5M [06:11&lt;00:15, 598kB/s]
+ 79%|#######8  | 32.8M/41.5M [06:11&lt;00:14, 637kB/s]
+ 79%|#######9  | 32.8M/41.5M [06:11&lt;00:14, 609kB/s]
+ 79%|#######9  | 32.9M/41.5M [06:11&lt;00:16, 563kB/s]
+ 79%|#######9  | 33.0M/41.5M [06:11&lt;00:14, 600kB/s]
+ 80%|#######9  | 33.1M/41.5M [06:11&lt;00:13, 639kB/s]
+ 80%|#######9  | 33.1M/41.5M [06:11&lt;00:14, 609kB/s]
+ 80%|#######9  | 33.2M/41.5M [06:11&lt;00:15, 563kB/s]
+ 80%|########  | 33.3M/41.5M [06:12&lt;00:14, 600kB/s]
+ 80%|########  | 33.4M/41.5M [06:12&lt;00:13, 640kB/s]
+ 81%|########  | 33.4M/41.5M [06:12&lt;00:13, 609kB/s]
+ 81%|########  | 33.5M/41.5M [06:12&lt;00:14, 563kB/s]
+ 81%|########  | 33.6M/41.5M [06:12&lt;00:13, 618kB/s]
+ 81%|########1 | 33.7M/41.5M [06:12&lt;00:12, 652kB/s]
+ 81%|########1 | 33.7M/41.5M [06:12&lt;00:13, 619kB/s]
+ 81%|########1 | 33.8M/41.5M [06:13&lt;00:14, 569kB/s]
+ 82%|########1 | 33.9M/41.5M [06:13&lt;00:13, 605kB/s]
+ 82%|########1 | 34.0M/41.5M [06:13&lt;00:12, 642kB/s]
+ 82%|########2 | 34.0M/41.5M [06:13&lt;00:12, 612kB/s]
+ 82%|########2 | 34.1M/41.5M [06:13&lt;00:13, 565kB/s]
+ 82%|########2 | 34.2M/41.5M [06:13&lt;00:12, 601kB/s]
+ 83%|########2 | 34.3M/41.5M [06:13&lt;00:11, 641kB/s]
+ 83%|########2 | 34.3M/41.5M [06:13&lt;00:12, 610kB/s]
+ 83%|########2 | 34.4M/41.5M [06:14&lt;00:12, 582kB/s]
+ 83%|########3 | 34.5M/41.5M [06:14&lt;00:11, 628kB/s]
+ 83%|########3 | 34.6M/41.5M [06:14&lt;00:10, 663kB/s]
+ 84%|########3 | 34.7M/41.5M [06:14&lt;00:11, 645kB/s]
+ 84%|########3 | 34.7M/41.5M [06:14&lt;00:12, 590kB/s]
+ 84%|########3 | 34.8M/41.5M [06:14&lt;00:11, 635kB/s]
+ 84%|########4 | 34.9M/41.5M [06:14&lt;00:10, 640kB/s]
+ 84%|########4 | 35.0M/41.5M [06:15&lt;00:09, 701kB/s]
+ 85%|########4 | 35.1M/41.5M [06:15&lt;00:09, 673kB/s]
+ 85%|########4 | 35.2M/41.5M [06:15&lt;00:10, 629kB/s]
+ 85%|########5 | 35.3M/41.5M [06:15&lt;00:09, 680kB/s]
+ 85%|########5 | 35.4M/41.5M [06:15&lt;00:12, 502kB/s]
+ 86%|########5 | 35.5M/41.5M [06:15&lt;00:09, 688kB/s]
+ 86%|########5 | 35.6M/41.5M [06:15&lt;00:09, 672kB/s]
+ 86%|########6 | 35.7M/41.5M [06:16&lt;00:09, 672kB/s]
+ 86%|########6 | 35.8M/41.5M [06:16&lt;00:09, 652kB/s]
+ 86%|########6 | 35.8M/41.5M [06:16&lt;00:10, 570kB/s]
+ 87%|########6 | 35.9M/41.5M [06:16&lt;00:10, 577kB/s]
+ 87%|########6 | 36.0M/41.5M [06:16&lt;00:09, 637kB/s]
+ 87%|########7 | 36.1M/41.5M [06:16&lt;00:08, 665kB/s]
+ 87%|########7 | 36.2M/41.5M [06:16&lt;00:08, 646kB/s]
+ 87%|########7 | 36.2M/41.5M [06:17&lt;00:09, 593kB/s]
+ 88%|########7 | 36.3M/41.5M [06:17&lt;00:08, 615kB/s]
+ 88%|########7 | 36.5M/41.5M [06:17&lt;00:08, 642kB/s]
+ 88%|########8 | 36.6M/41.5M [06:17&lt;00:07, 660kB/s]
+ 88%|########8 | 36.7M/41.5M [06:17&lt;00:07, 671kB/s]
+ 89%|########8 | 36.8M/41.5M [06:17&lt;00:06, 733kB/s]
+ 89%|########8 | 36.9M/41.5M [06:18&lt;00:08, 560kB/s]
+ 89%|########9 | 37.0M/41.5M [06:18&lt;00:06, 699kB/s]
+ 89%|########9 | 37.1M/41.5M [06:18&lt;00:07, 641kB/s]
+ 90%|########9 | 37.2M/41.5M [06:18&lt;00:07, 613kB/s]
+ 90%|########9 | 37.3M/41.5M [06:18&lt;00:07, 584kB/s]
+ 90%|######### | 37.4M/41.5M [06:19&lt;00:07, 576kB/s]
+ 90%|######### | 37.5M/41.5M [06:19&lt;00:07, 584kB/s]
+ 91%|######### | 37.6M/41.5M [06:19&lt;00:07, 576kB/s]
+ 91%|######### | 37.7M/41.5M [06:19&lt;00:06, 584kB/s]
+ 91%|#########1| 37.8M/41.5M [06:19&lt;00:06, 590kB/s]
+ 91%|#########1| 37.9M/41.5M [06:19&lt;00:06, 594kB/s]
+ 92%|#########1| 38.0M/41.5M [06:20&lt;00:06, 596kB/s]
+ 92%|#########1| 38.1M/41.5M [06:20&lt;00:07, 494kB/s]
+ 92%|#########2| 38.2M/41.5M [06:20&lt;00:05, 616kB/s]
+ 92%|#########2| 38.3M/41.5M [06:20&lt;00:06, 558kB/s]
+ 92%|#########2| 38.3M/41.5M [06:20&lt;00:06, 530kB/s]
+ 93%|#########2| 38.4M/41.5M [06:20&lt;00:06, 521kB/s]
+ 93%|#########2| 38.5M/41.5M [06:21&lt;00:05, 520kB/s]
+ 93%|#########3| 38.6M/41.5M [06:21&lt;00:05, 517kB/s]
+ 93%|#########3| 38.7M/41.5M [06:21&lt;00:05, 529kB/s]
+ 93%|#########3| 38.8M/41.5M [06:21&lt;00:05, 537kB/s]
+ 94%|#########3| 38.9M/41.5M [06:21&lt;00:05, 543kB/s]
+ 94%|#########3| 39.0M/41.5M [06:22&lt;00:04, 547kB/s]
+ 94%|#########4| 39.1M/41.5M [06:22&lt;00:04, 550kB/s]
+ 94%|#########4| 39.2M/41.5M [06:22&lt;00:04, 566kB/s]
+ 95%|#########4| 39.3M/41.5M [06:22&lt;00:04, 577kB/s]
+ 95%|#########4| 39.4M/41.5M [06:22&lt;00:03, 585kB/s]
+ 95%|#########5| 39.5M/41.5M [06:22&lt;00:03, 590kB/s]
+ 95%|#########5| 39.6M/41.5M [06:23&lt;00:03, 594kB/s]
+ 96%|#########5| 39.7M/41.5M [06:23&lt;00:03, 611kB/s]
+ 96%|#########5| 39.8M/41.5M [06:23&lt;00:02, 608kB/s]
+ 96%|#########6| 39.9M/41.5M [06:23&lt;00:02, 607kB/s]
+ 96%|#########6| 40.0M/41.5M [06:23&lt;00:02, 606kB/s]
+ 97%|#########6| 40.1M/41.5M [06:23&lt;00:02, 619kB/s]
+ 97%|#########6| 40.2M/41.5M [06:24&lt;00:02, 614kB/s]
+ 97%|#########7| 40.3M/41.5M [06:24&lt;00:02, 611kB/s]
+ 97%|#########7| 40.4M/41.5M [06:24&lt;00:01, 608kB/s]
+ 98%|#########7| 40.5M/41.5M [06:24&lt;00:01, 607kB/s]
+ 98%|#########7| 40.6M/41.5M [06:24&lt;00:01, 620kB/s]
+ 98%|#########8| 40.7M/41.5M [06:25&lt;00:01, 615kB/s]
+ 98%|#########8| 40.8M/41.5M [06:25&lt;00:01, 611kB/s]
+ 99%|#########8| 40.9M/41.5M [06:25&lt;00:00, 623kB/s]
+ 99%|#########8| 41.0M/41.5M [06:25&lt;00:00, 617kB/s]
+ 99%|#########9| 41.1M/41.5M [06:25&lt;00:00, 613kB/s]
+ 99%|#########9| 41.2M/41.5M [06:25&lt;00:00, 624kB/s]
+100%|#########9| 41.3M/41.5M [06:26&lt;00:00, 512kB/s]
+100%|#########9| 41.4M/41.5M [06:26&lt;00:00, 604kB/s]
+100%|##########| 41.5M/41.5M [06:26&lt;00:00, 113kB/s]
 </pre></div>
 </div>
 </div>
@@ -582,6 +2369,7 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
 OneFlow top-1 id: 281, class name: tabby, tabby cat
 </pre></div>
 </div>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 6 minutes  50.106 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-oneflow-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/f7ae979fbe61064749ce0fb7a621eb4c/from_oneflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_oneflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_paddle.html b/docs/how_to/compile_models/from_paddle.html
index ac8dfbfd0..1303b736d 100644
--- a/docs/how_to/compile_models/from_paddle.html
+++ b/docs/how_to/compile_models/from_paddle.html
@@ -464,7 +464,7 @@ A quick solution is</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>TVM prediction top-1 id: 282, class name:  282: &#39;tiger cat&#39;,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  23.283 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.115 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-paddle-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/16269b77359771348d507395692524cf/from_paddle.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_paddle.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 803221adf..ec3b03922 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -387,10 +387,8 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 10%|#         | 4.61M/44.7M [00:00&lt;00:00, 48.2MB/s]
- 42%|####2     | 18.8M/44.7M [00:00&lt;00:00, 107MB/s]
- 73%|#######3  | 32.8M/44.7M [00:00&lt;00:00, 125MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 128MB/s]
+ 53%|#####2    | 23.6M/44.7M [00:00&lt;00:00, 248MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 272MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 0d3420129..5dd595095 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -607,7 +607,6 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  2.150 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index a702d5bcf..d9a2fc933 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -300,18 +300,18 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:35.207</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>11:30.048</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <ul class="simple">
-<li><p><strong>01:23.283</strong>: <a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></li>
-<li><p><strong>01:02.150</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
-<li><p><strong>00:55.986</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
-<li><p><strong>00:31.428</strong>: <a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></li>
-<li><p><strong>00:24.838</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
-<li><p><strong>00:21.133</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
-<li><p><strong>00:20.815</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
-<li><p><strong>00:19.230</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
-<li><p><strong>00:13.473</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
-<li><p><strong>00:02.871</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
+<li><p><strong>06:50.106</strong>: <a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></li>
+<li><p><strong>01:04.115</strong>: <a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></li>
+<li><p><strong>00:59.156</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
+<li><p><strong>00:56.570</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
+<li><p><strong>00:24.524</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
+<li><p><strong>00:20.600</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
+<li><p><strong>00:20.526</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
+<li><p><strong>00:19.185</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
+<li><p><strong>00:12.821</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
+<li><p><strong>00:02.445</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index 5c8dd1de6..27e3e90fa 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -622,7 +622,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  16.1288      16.1018      16.3555      16.0681       0.0847
+  15.8762      15.5255      16.7675      15.4631       0.4818
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index 98e175eea..d2854c1b9 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -409,60 +409,13 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
-  1%|1         | 1.94M/170M [00:00&lt;00:08, 20.0MB/s]
-  2%|2         | 3.84M/170M [00:00&lt;00:09, 17.7MB/s]
-  4%|3         | 6.73M/170M [00:00&lt;00:07, 23.1MB/s]
-  5%|5         | 9.01M/170M [00:00&lt;00:07, 23.1MB/s]
-  7%|6         | 11.9M/170M [00:00&lt;00:06, 25.4MB/s]
-  8%|8         | 14.3M/170M [00:00&lt;00:06, 25.0MB/s]
- 10%|9         | 16.9M/170M [00:00&lt;00:06, 25.6MB/s]
- 12%|#1        | 20.1M/170M [00:00&lt;00:05, 28.0MB/s]
- 13%|#3        | 22.8M/170M [00:01&lt;00:06, 22.9MB/s]
- 15%|#4        | 25.1M/170M [00:01&lt;00:07, 21.3MB/s]
- 16%|#6        | 27.5M/170M [00:01&lt;00:06, 22.0MB/s]
- 17%|#7        | 29.7M/170M [00:01&lt;00:06, 21.7MB/s]
- 20%|#9        | 33.4M/170M [00:01&lt;00:05, 26.2MB/s]
- 21%|##1       | 36.1M/170M [00:01&lt;00:05, 27.0MB/s]
- 23%|##2       | 38.8M/170M [00:01&lt;00:05, 26.5MB/s]
- 25%|##4       | 42.4M/170M [00:01&lt;00:04, 29.4MB/s]
- 28%|##7       | 47.1M/170M [00:01&lt;00:03, 35.2MB/s]
- 30%|##9       | 50.7M/170M [00:01&lt;00:03, 35.5MB/s]
- 32%|###2      | 55.1M/170M [00:02&lt;00:03, 38.8MB/s]
- 35%|###4      | 58.9M/170M [00:02&lt;00:03, 35.9MB/s]
- 37%|###6      | 62.4M/170M [00:02&lt;00:03, 31.1MB/s]
- 39%|###8      | 65.5M/170M [00:02&lt;00:03, 31.3MB/s]
- 40%|####      | 68.5M/170M [00:02&lt;00:03, 29.8MB/s]
- 42%|####2     | 72.1M/170M [00:02&lt;00:03, 31.7MB/s]
- 44%|####4     | 75.2M/170M [00:02&lt;00:03, 26.7MB/s]
- 46%|####5     | 77.9M/170M [00:03&lt;00:04, 22.9MB/s]
- 48%|####8     | 82.3M/170M [00:03&lt;00:03, 27.9MB/s]
- 51%|#####     | 86.0M/170M [00:03&lt;00:02, 30.6MB/s]
- 52%|#####2    | 89.2M/170M [00:03&lt;00:03, 28.1MB/s]
- 55%|#####4    | 92.7M/170M [00:03&lt;00:02, 29.5MB/s]
- 56%|#####6    | 95.6M/170M [00:03&lt;00:03, 25.3MB/s]
- 58%|#####8    | 98.7M/170M [00:03&lt;00:02, 26.4MB/s]
- 60%|#####9    | 102M/170M [00:03&lt;00:02, 27.7MB/s]
- 62%|######2   | 106M/170M [00:03&lt;00:02, 30.6MB/s]
- 65%|######4   | 110M/170M [00:04&lt;00:01, 32.0MB/s]
- 67%|######6   | 113M/170M [00:04&lt;00:01, 33.6MB/s]
- 69%|######8   | 117M/170M [00:04&lt;00:01, 35.2MB/s]
- 71%|#######   | 120M/170M [00:04&lt;00:01, 35.2MB/s]
- 73%|#######3  | 125M/170M [00:04&lt;00:01, 37.7MB/s]
- 76%|#######5  | 128M/170M [00:04&lt;00:01, 29.8MB/s]
- 77%|#######7  | 131M/170M [00:04&lt;00:01, 27.2MB/s]
- 80%|########  | 136M/170M [00:04&lt;00:01, 33.1MB/s]
- 83%|########2 | 140M/170M [00:05&lt;00:00, 34.5MB/s]
- 85%|########4 | 144M/170M [00:05&lt;00:00, 32.5MB/s]
- 87%|########6 | 147M/170M [00:05&lt;00:00, 27.3MB/s]
- 88%|########8 | 150M/170M [00:05&lt;00:00, 25.0MB/s]
- 90%|########9 | 152M/170M [00:05&lt;00:00, 25.0MB/s]
- 91%|#########1| 155M/170M [00:05&lt;00:00, 24.2MB/s]
- 93%|#########2| 158M/170M [00:05&lt;00:00, 26.2MB/s]
- 95%|#########4| 161M/170M [00:05&lt;00:00, 24.7MB/s]
- 96%|#########5| 163M/170M [00:06&lt;00:00, 24.3MB/s]
- 97%|#########7| 165M/170M [00:06&lt;00:00, 24.0MB/s]
- 99%|#########8| 168M/170M [00:06&lt;00:00, 22.8MB/s]
-100%|##########| 170M/170M [00:06&lt;00:00, 27.9MB/s]
+ 11%|#1        | 19.3M/170M [00:00&lt;00:00, 203MB/s]
+ 27%|##6       | 45.8M/170M [00:00&lt;00:00, 246MB/s]
+ 42%|####2     | 72.1M/170M [00:00&lt;00:00, 260MB/s]
+ 58%|#####7    | 98.3M/170M [00:00&lt;00:00, 266MB/s]
+ 73%|#######3  | 124M/170M [00:00&lt;00:00, 269MB/s]
+ 88%|########8 | 150M/170M [00:00&lt;00:00, 269MB/s]
+100%|##########| 170M/170M [00:00&lt;00:00, 264MB/s]
 /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=&#39;floor&#39;).
@@ -555,7 +508,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  10.453 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  56.175 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index 49afbc178..9e75e1d6a 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -450,12 +450,7 @@ training. Other models require a full post training calibration.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
- 10%|#         | 1.38M/13.6M [00:00&lt;00:00, 14.5MB/s]
- 33%|###3      | 4.54M/13.6M [00:00&lt;00:00, 25.4MB/s]
- 51%|#####1    | 6.97M/13.6M [00:00&lt;00:00, 24.1MB/s]
- 73%|#######3  | 9.90M/13.6M [00:00&lt;00:00, 25.3MB/s]
- 91%|######### | 12.3M/13.6M [00:00&lt;00:00, 24.2MB/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 23.7MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 167MB/s]
 </pre></div>
 </div>
 </div>
@@ -544,7 +539,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.2412      90.2071      90.8011      90.0909       0.1241
+  90.1441      90.0327      91.7263      89.8729       0.2893
 </pre></div>
 </div>
 <div class="admonition note">
@@ -583,7 +578,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  5.816 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  3.230 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index bf826ceac..60e710a4d 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -540,7 +540,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  117.0151     116.9941     119.1223     115.3432      0.9252
+  117.5831     117.4965     119.9891     116.6112      0.6789
 </pre></div>
 </div>
 <div class="admonition note">
@@ -568,7 +568,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  58.607 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  55.881 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 7887de8ac..9377f3719 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -480,7 +480,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  21.769 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.308 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index 3a6069fe6..bb1d68eb5 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -415,22 +415,22 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  5%|5         | 6999/132723 [00:00&lt;00:01, 69969.38KB/s]
- 12%|#1        | 15727/132723 [00:00&lt;00:01, 80147.99KB/s]
- 19%|#8        | 24560/132723 [00:00&lt;00:01, 83881.04KB/s]
- 25%|##4       | 32949/132723 [00:00&lt;00:01, 80221.28KB/s]
- 31%|###1      | 41794/132723 [00:00&lt;00:01, 83102.74KB/s]
- 38%|###8      | 50653/132723 [00:00&lt;00:00, 84931.94KB/s]
- 45%|####4     | 59479/132723 [00:00&lt;00:00, 86003.43KB/s]
- 52%|#####1    | 68362/132723 [00:00&lt;00:00, 86889.43KB/s]
- 58%|#####8    | 77304/132723 [00:00&lt;00:00, 87674.86KB/s]
- 65%|######4   | 86187/132723 [00:01&lt;00:00, 88027.99KB/s]
- 72%|#######1  | 95042/132723 [00:01&lt;00:00, 88183.97KB/s]
- 78%|#######8  | 103982/132723 [00:01&lt;00:00, 88551.68KB/s]
- 85%|########5 | 112918/132723 [00:01&lt;00:00, 88793.65KB/s]
- 92%|#########1| 121800/132723 [00:01&lt;00:00, 88484.18KB/s]
- 98%|#########8| 130681/132723 [00:01&lt;00:00, 88579.50KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 86307.82KB/s]
+  5%|5         | 6757/132723 [00:00&lt;00:01, 67559.27KB/s]
+ 12%|#1        | 15560/132723 [00:00&lt;00:01, 79595.33KB/s]
+ 18%|#8        | 24463/132723 [00:00&lt;00:01, 83901.77KB/s]
+ 25%|##5       | 33310/132723 [00:00&lt;00:01, 85702.78KB/s]
+ 32%|###1      | 42202/132723 [00:00&lt;00:01, 86861.86KB/s]
+ 38%|###8      | 51061/132723 [00:00&lt;00:00, 87447.02KB/s]
+ 45%|####5     | 59862/132723 [00:00&lt;00:00, 87630.07KB/s]
+ 52%|#####1    | 68626/132723 [00:00&lt;00:00, 87538.91KB/s]
+ 58%|#####8    | 77439/132723 [00:00&lt;00:00, 87717.76KB/s]
+ 65%|######4   | 86267/132723 [00:01&lt;00:00, 87889.71KB/s]
+ 72%|#######1  | 95173/132723 [00:01&lt;00:00, 88246.73KB/s]
+ 78%|#######8  | 104014/132723 [00:01&lt;00:00, 88287.51KB/s]
+ 85%|########5 | 112862/132723 [00:01&lt;00:00, 88340.22KB/s]
+ 92%|#########1| 121697/132723 [00:01&lt;00:00, 88298.19KB/s]
+ 98%|#########8| 130598/132723 [00:01&lt;00:00, 88509.55KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 86935.39KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -470,7 +470,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 </pre></div>
 </div>
 <img alt="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" />
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  17.852 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  17.619 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index 6c6daf89e..21e75e40f 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -300,16 +300,16 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>11:45.672</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>10:09.203</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <ul class="simple">
-<li><p><strong>03:10.453</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
-<li><p><strong>02:21.769</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
-<li><p><strong>02:17.852</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
-<li><p><strong>01:58.607</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
-<li><p><strong>01:05.816</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
-<li><p><strong>00:28.562</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
-<li><p><strong>00:22.433</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
-<li><p><strong>00:00.181</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
+<li><p><strong>02:56.175</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
+<li><p><strong>02:17.619</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
+<li><p><strong>01:55.881</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
+<li><p><strong>01:07.308</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
+<li><p><strong>01:03.230</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
+<li><p><strong>00:27.226</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
+<li><p><strong>00:21.587</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
+<li><p><strong>00:00.176</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 8cc08a226..9fa9de663 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -588,7 +588,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip094144d5-fdd2-4b2b-9231-75f8c8ece3c7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip7d4ccf53-5f97-4de0-9c7a-502154829388 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
@@ -650,7 +650,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Check failed: (lower) is false: Intrinsic lowering function for target llvm, intrinsic name tir.sqrt, type 150 not found
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Check failed: (lower) is false: FloatImm lowering function for target llvm type 150 not found
 </pre></div>
 </div>
 <p>When we attempt to run the model, we get a familiar error telling us that more functions need to be registerd for myfloat.</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index efcaea7a2..55aa1da7c 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -300,12 +300,12 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:37.308</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:36.948</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:33.934</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
-<li><p><strong>00:02.178</strong>: <a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></li>
-<li><p><strong>00:01.008</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
-<li><p><strong>00:00.187</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
+<li><p><strong>00:33.587</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
+<li><p><strong>00:02.171</strong>: <a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></li>
+<li><p><strong>00:01.009</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
+<li><p><strong>00:00.181</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index c0945d6ae..6d1d3a07e 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -486,10 +486,10 @@ profile the execution time of each passes.</p>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 5844us [5844us] (45.16%; 45.16%)
-FoldScaleAxis: 7098us [2us] (54.84%; 54.84%)
-        FoldConstant: 7096us [1481us] (54.83%; 99.97%)
-                InferType: 5615us [5615us] (43.39%; 79.13%)
+InferType: 6108us [6108us] (45.64%; 45.64%)
+FoldScaleAxis: 7276us [2us] (54.36%; 54.36%)
+        FoldConstant: 7273us [1522us] (54.35%; 99.97%)
+                InferType: 5751us [5751us] (42.97%; 79.07%)
 </pre></div>
 </div>
 </div>
@@ -512,10 +512,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 5678us [5678us] (44.65%; 44.65%)
-FoldScaleAxis: 7040us [2us] (55.35%; 55.35%)
-        FoldConstant: 7038us [1455us] (55.34%; 99.98%)
-                InferType: 5583us [5583us] (43.90%; 79.32%)
+InferType: 5810us [5810us] (44.54%; 44.54%)
+FoldScaleAxis: 7233us [2us] (55.46%; 55.46%)
+        FoldConstant: 7232us [1511us] (55.44%; 99.98%)
+                InferType: 5721us [5721us] (43.86%; 79.11%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index 3ef397bab..31ea9939b 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -534,7 +534,7 @@ latency of convolution.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 35.339016 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 40.653210 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index 7b88b6a14..b76083088 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -878,7 +878,7 @@ be able to run on our build server</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 8.956960 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 10.205289 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index b569b3fa5..5a584683e 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -431,8 +431,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.018245
-Baseline: 3.306084
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.017612
+Baseline: 3.390872
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -494,7 +494,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.289051
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.307004
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -563,7 +563,7 @@ vastly.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.329916
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.338097
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -626,7 +626,7 @@ the access pattern for A matrix is more cache friendly.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.117861
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.112429
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -711,7 +711,7 @@ flattening.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110710
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110156
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -799,7 +799,7 @@ write to C when all the block results are ready.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111893
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111282
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -891,7 +891,7 @@ write to C when all the block results are ready.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.146405
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.144558
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index 6f84b9684..dd0b72136 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -300,11 +300,11 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:34.502</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:34.835</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:31.909</strong>: <a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></li>
-<li><p><strong>00:01.424</strong>: <a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></li>
-<li><p><strong>00:01.169</strong>: <a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></li>
+<li><p><strong>00:32.206</strong>: <a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></li>
+<li><p><strong>00:01.432</strong>: <a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></li>
+<li><p><strong>00:01.198</strong>: <a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index 1fc5c3894..0b282aed0 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -300,14 +300,14 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:00.449</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>04:50.844</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <ul class="simple">
-<li><p><strong>02:28.084</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
-<li><p><strong>01:18.796</strong>: <a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></li>
-<li><p><strong>00:39.963</strong>: <a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></li>
-<li><p><strong>00:17.032</strong>: <a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></li>
-<li><p><strong>00:08.373</strong>: <a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></li>
-<li><p><strong>00:08.201</strong>: <a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></li>
+<li><p><strong>02:19.891</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
+<li><p><strong>01:17.690</strong>: <a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></li>
+<li><p><strong>00:39.503</strong>: <a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></li>
+<li><p><strong>00:16.807</strong>: <a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></li>
+<li><p><strong>00:08.768</strong>: <a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></li>
+<li><p><strong>00:08.185</strong>: <a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index 3a12ddb02..e5d56c3f0 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -471,11 +471,11 @@ cooperative fetching, unrolling and operator fusion.</p>
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
   preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
   attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 32;
-  allocate(conv2d_nchw: Pointer(local float32), float32, [16]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
-    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [16], [], scope=&quot;local&quot;, align=64)[0] = 0f32
+  allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [1568]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [512]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
+    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope=&quot;local&quot;, align=32)[0] = 0f32
     conv2d_nchw_1[1] = 0f32
     conv2d_nchw_1[2] = 0f32
     conv2d_nchw_1[3] = 0f32
@@ -489,943 +489,74 @@ cooperative fetching, unrolling and operator fusion.</p>
     conv2d_nchw_1[11] = 0f32
     conv2d_nchw_1[12] = 0f32
     conv2d_nchw_1[13] = 0f32
-    conv2d_nchw_1[14] = 0f32
-    conv2d_nchw_1[15] = 0f32
     for (rc.outer.outer: int32, 0, 16) {
-      for (rx.outer.outer: int32, 0, 3) {
-        let cse_var_2: int32 = (rc.outer.outer*1568)
-        let cse_var_1: int32 = (rc.outer.outer*288)
-         {
-          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope=&quot;shared&quot;)[threadIdx.x_1] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 49)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 7), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(thre [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 14), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(thr [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 147)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 21), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 28), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 245)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 35), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 42), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 343)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 49), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) &lt; 8) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 56), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 441)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 335)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 70), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 539)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 77), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 84), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 637)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 91), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 98), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(th [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 735)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 105), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(t [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 112), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(t [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 833)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) &lt; 8) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 119), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 678)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 931)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 133), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(t [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 140), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(t [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1029)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 147), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 154), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1127)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 161), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 168), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1225)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 175), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) &lt; 8) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 182), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1323)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1021)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 196), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1421)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 203), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 210), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1519)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 217), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 224), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1617)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 231), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 238), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1715)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) &lt; 8) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 245), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1364)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1813)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 259), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 266), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1911)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 273), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod( [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 280), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          if @tir.likely((threadIdx.x_1 &lt; 7), dtype=bool) {
-            pad_temp.shared_1[(threadIdx.x_1 + 2009)] = 0f32
-          }
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 49)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 49), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 49), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 98)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 98), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 2), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 147)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 147), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 51), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 196)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 196), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 4), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 245)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 245), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 53), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 294)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 294), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 6), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 343)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 343), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 55), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 392), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 8), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 441)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 441), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 57), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 490)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 490), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 10), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 539)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 539), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 59), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 588)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 588), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 12), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 637)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 637), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 61), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 686)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 686), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 14), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 735)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 735), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 63), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 16), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 833)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 833), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 65), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 882)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 882), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 18), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 931)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 931), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 67), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 980)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 980), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 20), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1029)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1029), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 69), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1078)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1078), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 22), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1127)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1127), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 71), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1176), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 24), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1225)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1225), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 73), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1274)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1274), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 26), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1323)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1323), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 75), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1372)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1372), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 28), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1421)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1421), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 77), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          kernel.shared_1[(threadIdx.x_2 + 1470)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1470), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 30), 96)*3)) + rx.outer.outer)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
-          if @tir.likely((threadIdx.x_2 &lt; 17), dtype=bool) {
-            kernel.shared_1[(threadIdx.x_2 + 1519)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1519), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 79), 96)*3)) + rx.outer.outer)]
-          }
-          for (rc.outer.inner: int32, 0, 2) {
-            let cse_var_3: int32 = (rc.outer.inner*48)
-             {
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[cse_var_3]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 96)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 192)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 288)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 97)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 193)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 289)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 2)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 98)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 194)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 290)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 3)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 99)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 195)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 291)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 4)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 100)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 196)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 292)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 5)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 101)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 197)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 293)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 6)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 102)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 198)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 294)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 7)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 103)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 199)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 295)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 8)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 104)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 200)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 296)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 9)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 105)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 201)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 297)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 10)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 106)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 202)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 298)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 11)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 107)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 203)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 299)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 12)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 108)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 204)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 300)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 13)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 109)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 205)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 301)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 14)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 110)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 206)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 302)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 15)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 111)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 207)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 303)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 16)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 112)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 208)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 304)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 17)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 113)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 209)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 305)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 18)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 114)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 210)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 306)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 19)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 115)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 211)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 307)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 20)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 116)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 212)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 308)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 21)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 117)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 213)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 309)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 22)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 118)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 214)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 310)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 23)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 119)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 215)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 311)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 24)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 120)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 216)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 312)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 25)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 121)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 217)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 313)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 26)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 122)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 218)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 314)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 27)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 123)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 219)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 315)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 28)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 124)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 220)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 316)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 29)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 125)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 221)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 317)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 30)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 126)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 222)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 318)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 31)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 127)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 223)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 319)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 32)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 128)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 224)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 320)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 33)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 129)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 225)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 321)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 34)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 130)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 226)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 322)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 35)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 131)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 227)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 323)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 36)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 132)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 228)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 324)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 37)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 133)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 229)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 325)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 38)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 134)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 230)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 326)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 39)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 135)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 231)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 327)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 40)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 136)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 232)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 328)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 41)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 137)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 233)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 329)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 42)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 138)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 234)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 330)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 43)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 139)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 235)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 331)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 44)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 140)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 236)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 332)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 45)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 141)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 237)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 333)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 46)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 142)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 238)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 334)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 47)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 143)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 239)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 335)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 384)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 480)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 576)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 672)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 385)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 481)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 577)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 673)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 386)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 482)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 578)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 674)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 387)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 483)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 579)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 675)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 388)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 484)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 580)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 676)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 389)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 485)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 581)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 677)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 390)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 486)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 582)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 678)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 391)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 487)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 583)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 679)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 392)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 488)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 584)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 680)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 393)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 489)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 585)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 681)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 394)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 490)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 586)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 682)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 395)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 491)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 587)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 683)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 396)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 492)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 588)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 684)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 397)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 493)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 589)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 685)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 398)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 494)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 590)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 686)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 399)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 495)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 591)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 687)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 400)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 496)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 592)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 688)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 401)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 497)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 593)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 689)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 402)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 498)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 594)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 690)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 403)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 499)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 595)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 691)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 404)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 500)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 596)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 692)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 405)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 501)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 597)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 693)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 406)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 502)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 598)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 694)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 407)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 503)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 599)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 695)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 408)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 504)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 600)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 696)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 409)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 505)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 601)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 697)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 410)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 506)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 602)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 698)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 411)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 507)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 603)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 699)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 412)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 508)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 604)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 700)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 413)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 509)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 605)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 701)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 414)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 510)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 606)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 702)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 415)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 511)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 607)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 703)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 416)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 512)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 608)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 704)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 417)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 513)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 609)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 705)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 418)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 514)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 610)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 706)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 419)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 515)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 611)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 707)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 420)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 516)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 612)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 708)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 421)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 517)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 613)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 709)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 422)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 518)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 614)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 710)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 423)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 519)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 615)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 711)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 424)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 520)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 616)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 712)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 425)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 521)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 617)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 713)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 426)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 522)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 618)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 714)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 427)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 523)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 619)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 715)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 428)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 524)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 620)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 716)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 429)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 525)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 621)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 717)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 430)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 526)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 622)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 718)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 431)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 527)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 623)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 719)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 768)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 864)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 960)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1056)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 769)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 865)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 961)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1057)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 770)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 866)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 962)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1058)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 771)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 867)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 963)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1059)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 772)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 868)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 964)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1060)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 773)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 869)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 965)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1061)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 774)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 870)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 966)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1062)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 775)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 871)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 967)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1063)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 776)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 872)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 968)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1064)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 777)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 873)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 969)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1065)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 778)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 874)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 970)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1066)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 779)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 875)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 971)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1067)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 780)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 876)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 972)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1068)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 781)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 877)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 973)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1069)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 782)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 878)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 974)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1070)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 783)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 879)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 975)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1071)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 784)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 880)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 976)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1072)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 785)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 881)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 977)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1073)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 786)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 882)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 978)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1074)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 787)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 883)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 979)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1075)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 788)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 884)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 980)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1076)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 789)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 885)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 981)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1077)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 790)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 886)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 982)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1078)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 791)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 887)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 983)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1079)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 792)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 888)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 984)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1080)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 793)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 889)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 985)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1081)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 794)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 890)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 986)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1082)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 795)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 891)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 987)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1083)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 796)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 892)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 988)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1084)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 797)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 893)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 989)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1085)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 798)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 894)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 990)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1086)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 799)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 895)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 991)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1087)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 800)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 896)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 992)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1088)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 801)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 897)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 993)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1089)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 802)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 898)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 994)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1090)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 803)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 899)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 995)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1091)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 804)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 900)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 996)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1092)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 805)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 901)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 997)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1093)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 806)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 902)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 998)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1094)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 807)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 903)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 999)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1095)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 808)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 904)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1000)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1096)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 809)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 905)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1001)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1097)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 810)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 906)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1002)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1098)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 811)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 907)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1003)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1099)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 812)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 908)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1004)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1100)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 813)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 909)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1005)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1101)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 814)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 910)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1006)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1102)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 815)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 911)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1007)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1103)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1152)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1248)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1344)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1440)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1153)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1249)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1345)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1441)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1154)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1250)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1346)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1442)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1155)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1251)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1347)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1443)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1156)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1252)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1348)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1444)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1157)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1253)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1349)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1445)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1158)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1254)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1350)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1446)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1159)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1255)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1351)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1447)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1160)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1256)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1352)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1448)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1161)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1257)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1353)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1449)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1162)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1258)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1354)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1450)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1163)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1259)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1355)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1451)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1164)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1260)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1356)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1452)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1165)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1261)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1357)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1453)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1166)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1262)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1358)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1454)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1167)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1263)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1359)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1455)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1168)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1264)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1360)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1456)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1169)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1265)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1361)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1457)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1170)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1266)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1362)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1458)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1171)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1267)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1363)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1459)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1172)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1268)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1364)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1460)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1173)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1269)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1365)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1461)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1174)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1270)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1366)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1462)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1175)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1271)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1367)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1463)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1176)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1272)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1368)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1464)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1177)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1273)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1369)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1465)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1178)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1274)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1370)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1466)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1179)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1275)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1371)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1467)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1180)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1276)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1372)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1468)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1181)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1277)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1373)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1469)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1182)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1278)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1374)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1470)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1183)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1279)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1375)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1471)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1184)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1280)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1376)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1472)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1185)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1281)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1377)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1473)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1186)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1282)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1378)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1474)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1187)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1283)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1379)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1475)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1188)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1284)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1380)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1476)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1189)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1285)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1381)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1477)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1190)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1286)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1382)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1478)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1191)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1287)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1383)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1479)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1192)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1288)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1384)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1480)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1193)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1289)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1385)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1481)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1194)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1290)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1386)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1482)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1195)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1291)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1387)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1483)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1196)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1292)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1388)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1484)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1197)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1293)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1389)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1485)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1198)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1294)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1390)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1486)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1199)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1295)]))
-              conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1391)]))
-              conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1487)]))
+      for (ry.outer.outer: int32, 0, 3) {
+        for (rx.outer.outer: int32, 0, 3) {
+          let cse_var_2: int32 = (rc.outer.outer*288)
+          let cse_var_1: int32 = (ry.outer.outer*3)
+           {
+            for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 28) {
+              let cse_var_3: int32 = (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56)
+              attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [1568], [], scope=&quot;shared&quot;)[(cse_var_3 + threadIdx.x_1)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7))) &amp;&amp; ((ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7)) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp;  [...]
+            }
+            attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1: Buffer(kernel.shared, float32, [512], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 32)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 7), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 14), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 21), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 32256)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 35), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 42), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 49), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 64512)]
+            attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+            if @tir.likely((threadIdx.x_2 &lt; 8), dtype=bool) {
+              kernel.shared_1[(threadIdx.x_2 + 504)] = kernel[((((((blockIdx.x*73728) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer) + 69120)]
+            }
+            for (rc.outer.inner: int32, 0, 16) {
+              for (ff.outer.inner: int32, 0, 2) {
+                let cse_var_10: int32 = (ff.outer.inner*7)
+                let cse_var_9: int32 = (cse_var_10 + 6)
+                let cse_var_8: int32 = (cse_var_10 + 5)
+                let cse_var_7: int32 = (cse_var_10 + 4)
+                let cse_var_6: int32 = (cse_var_10 + 3)
+                let cse_var_5: int32 = (cse_var_10 + 2)
+                let cse_var_4: int32 = (cse_var_10 + 1)
+                 {
+                  conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                  conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                  conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                  conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                  conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                  conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                  conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+                  conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 49)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                  conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 50)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                  conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 51)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                  conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 52)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                  conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 53)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                  conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 54)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                  conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 55)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+                }
+              }
             }
           }
         }
       }
     }
-    for (i1.inner: int32, 0, 16) {
-      compute[(((blockIdx.x*784) + (i1.inner*49)) + threadIdx.x)] = max((conv2d_nchw_1[i1.inner] + bias[((blockIdx.x*16) + i1.inner)]), 0f32)
+    for (i1.inner: int32, 0, 2) {
+      for (i3.inner: int32, 0, 7) {
+        compute[(((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*16) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+      }
     }
   }
 }
@@ -1463,7 +594,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.229 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.330 ms
 </pre></div>
 </div>
 </div>
@@ -1493,21 +624,21 @@ conv2d_nchw_nn_o_i, conv2d_nchw_nn_i = s[conv2d_nchw].split(conv2d_nchw_nn, fact
 conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
-conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=4)
-conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=4)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=1)
+conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
 conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
 conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
 conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
-conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
+conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=7)
 conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=16)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=2)
-conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
+conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
+conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
 conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
 conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
 conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
@@ -1515,14 +646,14 @@ s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nc
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=16)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=1)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
 compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
 compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
 compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
 compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
 s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
 s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -1542,14 +673,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 1024)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 16)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -1567,10 +698,10 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(49) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[16];
-  __shared__ float pad_temp_shared[2016];
-  __shared__ float kernel_shared[1536];
+extern &quot;C&quot; __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[14];
+  __shared__ float pad_temp_shared[1568];
+  __shared__ float kernel_shared[512];
   conv2d_nchw[0] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
@@ -1585,864 +716,51 @@ extern &quot;C&quot; __global__ void __launch_bounds__(49) default_function_kern
   conv2d_nchw[11] = 0.000000e+00f;
   conv2d_nchw[12] = 0.000000e+00f;
   conv2d_nchw[13] = 0.000000e+00f;
-  conv2d_nchw[14] = 0.000000e+00f;
-  conv2d_nchw[15] = 0.000000e+00f;
   for (int rc_outer_outer = 0; rc_outer_outer &lt; 16; ++rc_outer_outer) {
-    for (int rx_outer_outer = 0; rx_outer_outer &lt; 3; ++rx_outer_outer) {
-      __syncthreads();
-      pad_temp_shared[((int)threadIdx.x)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 49)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 49) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 98) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 147)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 147) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 196)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 196) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 245)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 245) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 294) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 343)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 343) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 392)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 392) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 441)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 335)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 490) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 539)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 539) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 588) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 637)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 637) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 686) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 735)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 735) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 784) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 833)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 833) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 882)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 678)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 931)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 931) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 980) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1029)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1029) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1078) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1127)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1127) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1176) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1225)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1225) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1274)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1274) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1323)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1021)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1372) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1421)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1421) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1470) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1519)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1519) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1568) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1617)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1617) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1666) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1715)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1715) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1764)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1364)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1813)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1813) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1862) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1911)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1911) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1960) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
-      if (((int)threadIdx.x) &lt; 7) {
-        pad_temp_shared[(((int)threadIdx.x) + 2009)] = 0.000000e+00f;
-      }
-      kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 49)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 49) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 49) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 98)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 98) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 2) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 147)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 147) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 51) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 196)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 196) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 4) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 245)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 245) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 53) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 294)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 294) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 6) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 343)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 343) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 55) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 392)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 8) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 441)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 441) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 57) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 490)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 490) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 10) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 539)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 539) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 59) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 588)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 588) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 12) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 637)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 637) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 61) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 686)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 686) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 14) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 735)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 735) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 63) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 784)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 16) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 833)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 833) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 65) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 882)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 882) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 18) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 931)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 931) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 67) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 980)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 980) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 20) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1029)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1029) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 69) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1078)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1078) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 22) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1127)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1127) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 71) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1176) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 24) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1225)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1225) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 73) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1274)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1274) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 26) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1323)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1323) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 75) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1372)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1372) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 28) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1421)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1421) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 77) % 96) * 3)) + rx_outer_outer)];
-      kernel_shared[(((int)threadIdx.x) + 1470)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1470) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 30) * 3)) + rx_outer_outer)];
-      if (((int)threadIdx.x) &lt; 17) {
-        kernel_shared[(((int)threadIdx.x) + 1519)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1519) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 79) * 3)) + rx_outer_outer)];
-      }
-      __syncthreads();
-      for (int rc_outer_inner = 0; rc_outer_inner &lt; 2; ++rc_outer_inner) {
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[(rc_outer_inner * 48)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 96)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 192)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 288)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 97)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 193)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 289)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 2)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 98)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 194)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 290)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 3)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 99)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 195)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 291)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 4)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 100)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 196)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 292)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 5)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 101)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 197)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 293)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 6)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 102)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 198)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 294)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 7)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 103)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 199)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 295)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 8)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 104)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 200)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 296)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 9)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 105)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 201)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 297)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 10)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 106)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 202)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 298)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 11)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 107)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 203)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 299)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 12)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 108)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 204)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 300)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 13)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 109)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 205)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 301)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 14)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 110)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 206)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 302)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 15)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 111)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 207)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 303)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 16)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 112)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 208)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 304)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 17)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 113)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 209)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 305)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 18)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 114)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 210)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 306)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 19)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 115)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 211)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 307)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 20)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 116)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 212)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 308)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 21)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 117)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 213)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 309)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 22)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 118)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 214)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 310)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 23)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 119)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 215)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 311)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 24)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 120)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 216)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 312)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 25)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 121)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 217)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 313)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 26)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 122)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 218)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 314)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 27)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 123)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 219)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 315)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 28)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 124)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 220)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 316)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 29)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 125)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 221)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 317)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 30)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 126)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 222)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 318)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 31)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 127)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 223)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 319)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 32)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 128)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 224)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 320)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 33)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 129)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 225)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 321)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 34)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 130)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 226)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 322)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 35)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 131)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 227)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 323)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 36)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 132)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 228)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 324)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 37)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 133)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 229)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 325)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 38)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 134)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 230)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 326)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 39)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 135)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 231)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 327)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 40)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 136)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 232)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 328)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 41)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 137)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 233)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 329)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 42)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 138)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 234)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 330)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 43)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 139)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 235)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 331)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 44)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 140)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 236)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 332)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 45)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 141)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 237)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 333)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 46)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 142)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 238)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 334)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 47)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 143)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 239)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 335)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 384)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 480)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 576)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 672)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 385)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 481)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 577)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 673)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 386)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 482)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 578)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 674)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 387)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 483)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 579)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 675)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 388)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 484)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 580)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 676)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 389)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 485)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 581)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 677)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 390)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 486)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 582)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 678)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 391)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 487)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 583)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 679)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 392)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 488)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 584)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 680)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 393)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 489)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 585)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 681)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 394)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 490)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 586)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 682)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 395)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 491)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 587)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 683)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 396)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 492)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 588)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 684)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 397)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 493)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 589)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 685)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 398)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 494)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 590)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 686)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 399)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 495)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 591)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 687)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 400)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 496)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 592)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 688)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 401)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 497)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 593)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 689)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 402)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 498)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 594)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 690)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 403)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 499)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 595)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 691)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 404)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 500)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 596)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 692)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 405)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 501)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 597)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 693)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 406)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 502)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 598)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 694)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 407)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 503)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 599)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 695)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 408)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 504)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 600)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 696)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 409)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 505)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 601)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 697)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 410)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 506)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 602)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 698)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 411)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 507)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 603)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 699)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 412)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 508)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 604)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 700)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 413)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 509)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 605)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 701)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 414)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 510)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 606)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 702)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 415)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 511)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 607)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 703)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 416)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 512)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 608)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 704)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 417)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 513)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 609)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 705)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 418)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 514)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 610)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 706)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 419)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 515)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 611)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 707)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 420)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 516)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 612)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 708)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 421)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 517)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 613)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 709)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 422)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 518)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 614)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 710)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 423)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 519)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 615)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 711)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 424)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 520)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 616)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 712)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 425)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 521)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 617)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 713)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 426)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 522)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 618)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 714)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 427)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 523)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 619)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 715)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 428)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 524)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 620)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 716)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 429)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 525)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 621)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 717)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 430)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 526)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 622)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 718)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 431)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 527)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 623)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 719)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 768)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 864)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 960)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1056)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 769)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 865)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 961)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1057)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 770)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 866)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 962)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1058)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 771)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 867)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 963)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1059)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 772)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 868)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 964)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1060)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 773)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 869)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 965)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1061)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 774)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 870)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 966)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1062)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 775)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 871)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 967)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1063)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 776)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 872)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 968)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1064)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 777)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 873)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 969)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1065)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 778)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 874)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 970)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1066)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 779)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 875)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 971)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1067)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 780)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 876)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 972)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1068)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 781)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 877)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 973)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1069)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 782)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 878)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 974)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1070)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 783)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 879)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 975)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1071)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 784)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 880)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 976)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1072)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 785)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 881)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 977)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1073)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 786)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 882)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 978)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1074)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 787)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 883)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 979)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1075)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 788)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 884)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 980)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1076)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 789)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 885)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 981)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1077)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 790)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 886)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 982)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1078)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 791)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 887)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 983)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1079)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 792)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 888)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 984)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1080)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 793)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 889)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 985)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1081)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 794)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 890)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 986)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1082)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 795)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 891)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 987)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1083)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 796)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 892)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 988)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1084)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 797)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 893)]));
... 3691 lines suppressed ...