You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/05/17 10:46:19 UTC
[tvm-site] branch asf-site updated: deploying docs (apache/tvm@de21c8f2ef507587fdcc99b851404de5aeeb5a16)
This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git
The following commit(s) were added to refs/heads/asf-site by this push:
new eb0d6feae deploying docs (apache/tvm@de21c8f2ef507587fdcc99b851404de5aeeb5a16)
eb0d6feae is described below
commit eb0d6feaeae89d7cb5b07840914ba5f19f95ef73
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Tue May 17 10:46:15 2022 +0000
deploying docs (apache/tvm@de21c8f2ef507587fdcc99b851404de5aeeb5a16)
---
.../how_to/compile_models/from_mxnet.rst.txt | 2 +-
.../how_to/compile_models/from_oneflow.rst.txt | 7 +-
.../how_to/compile_models/from_paddle.rst.txt | 2 +-
.../how_to/compile_models/from_pytorch.rst.txt | 2 +-
.../how_to/compile_models/from_tensorflow.rst.txt | 5 -
.../compile_models/sg_execution_times.rst.txt | 22 +-
.../deploy_models/deploy_model_on_android.rst.txt | 2 +-
.../deploy_object_detection_pytorch.rst.txt | 4 +-
.../deploy_models/deploy_prequantized.rst.txt | 6 +-
.../deploy_prequantized_tflite.rst.txt | 4 +-
.../how_to/deploy_models/deploy_quantized.rst.txt | 2 +-
.../deploy_models/deploy_ssd_gluoncv.rst.txt | 4 +-
.../deploy_models/sg_execution_times.rst.txt | 18 +-
.../extend_tvm/bring_your_own_datatypes.rst.txt | 4 +-
.../how_to/extend_tvm/sg_execution_times.rst.txt | 10 +-
.../how_to/extend_tvm/use_pass_instrument.rst.txt | 16 +-
.../optimize_operators/opt_conv_cuda.rst.txt | 2 +-
.../optimize_operators/opt_conv_tensorcore.rst.txt | 2 +-
.../how_to/optimize_operators/opt_gemm.rst.txt | 16 +-
.../optimize_operators/sg_execution_times.rst.txt | 8 +-
.../sg_execution_times.rst.txt | 16 +-
.../tune_conv2d_layer_cuda.rst.txt | 1940 ++------------------
.../tune_network_cuda.rst.txt | 2 +-
.../tune_network_x86.rst.txt | 4 +-
.../tune_sparse_x86.rst.txt | 172 +-
.../tune_with_autotvm/sg_execution_times.rst.txt | 12 +-
.../tune_with_autotvm/tune_conv2d_cuda.rst.txt | 34 +-
.../work_with_microtvm/micro_autotune.rst.txt | 16 +-
.../work_with_microtvm/sg_execution_times.rst.txt | 12 +-
.../work_with_relay/sg_execution_times.rst.txt | 8 +-
.../work_with_schedules/sg_execution_times.rst.txt | 18 +-
.../how_to/work_with_schedules/tensorize.rst.txt | 2 +-
.../tutorials/autotvm/sg_execution_times.rst.txt | 6 +-
.../frontend/deploy_classification.rst.txt | 2 +-
.../tutorials/frontend/deploy_detection.rst.txt | 2 +-
.../tutorials/frontend/sg_execution_times.rst.txt | 6 +-
.../tutorials/optimize/sg_execution_times.rst.txt | 6 +-
.../topic/vta/tutorials/sg_execution_times.rst.txt | 6 +-
.../tutorial/auto_scheduler_matmul_x86.rst.txt | 9 +-
docs/_sources/tutorial/autotvm_relay_x86.rst.txt | 56 +-
.../tutorial/cross_compilation_and_rpc.rst.txt | 2 +-
docs/_sources/tutorial/intro_topi.rst.txt | 2 +-
docs/_sources/tutorial/sg_execution_times.rst.txt | 26 +-
.../tutorial/tensor_expr_get_started.rst.txt | 44 +-
docs/commit_hash | 2 +-
docs/how_to/compile_models/from_mxnet.html | 2 +-
docs/how_to/compile_models/from_oneflow.html | 1874 ++++++++++++++++++-
docs/how_to/compile_models/from_paddle.html | 2 +-
docs/how_to/compile_models/from_pytorch.html | 6 +-
docs/how_to/compile_models/from_tensorflow.html | 1 -
docs/how_to/compile_models/sg_execution_times.html | 22 +-
.../deploy_models/deploy_model_on_android.html | 2 +-
.../deploy_object_detection_pytorch.html | 63 +-
docs/how_to/deploy_models/deploy_prequantized.html | 11 +-
.../deploy_models/deploy_prequantized_tflite.html | 4 +-
docs/how_to/deploy_models/deploy_quantized.html | 2 +-
docs/how_to/deploy_models/deploy_ssd_gluoncv.html | 34 +-
docs/how_to/deploy_models/sg_execution_times.html | 18 +-
.../extend_tvm/bring_your_own_datatypes.html | 4 +-
docs/how_to/extend_tvm/sg_execution_times.html | 10 +-
docs/how_to/extend_tvm/use_pass_instrument.html | 16 +-
docs/how_to/optimize_operators/opt_conv_cuda.html | 2 +-
.../optimize_operators/opt_conv_tensorcore.html | 2 +-
docs/how_to/optimize_operators/opt_gemm.html | 16 +-
.../optimize_operators/sg_execution_times.html | 8 +-
.../sg_execution_times.html | 14 +-
.../tune_conv2d_layer_cuda.html | 1940 ++------------------
.../tune_with_autoscheduler/tune_network_cuda.html | 2 +-
.../tune_with_autoscheduler/tune_network_x86.html | 4 +-
.../tune_with_autoscheduler/tune_sparse_x86.html | 172 +-
.../tune_with_autotvm/sg_execution_times.html | 12 +-
.../how_to/tune_with_autotvm/tune_conv2d_cuda.html | 34 +-
docs/how_to/work_with_microtvm/micro_autotune.html | 16 +-
.../work_with_microtvm/sg_execution_times.html | 12 +-
.../how_to/work_with_relay/sg_execution_times.html | 8 +-
.../work_with_schedules/sg_execution_times.html | 18 +-
docs/how_to/work_with_schedules/tensorize.html | 2 +-
docs/reference/api/python/auto_scheduler.html | 4 +-
.../api/typedoc/classes/bytestreamreader.html | 12 +-
.../api/typedoc/classes/cachedcallstack.html | 34 +-
docs/reference/api/typedoc/classes/dldatatype.html | 12 +-
docs/reference/api/typedoc/classes/dldevice.html | 10 +-
.../reference/api/typedoc/classes/environment.html | 12 +-
docs/reference/api/typedoc/classes/ffilibrary.html | 20 +-
.../api/typedoc/classes/graphexecutor.html | 16 +-
docs/reference/api/typedoc/classes/instance.html | 40 +-
docs/reference/api/typedoc/classes/memory.html | 34 +-
docs/reference/api/typedoc/classes/module.html | 10 +-
docs/reference/api/typedoc/classes/ndarray.html | 22 +-
.../api/typedoc/classes/packedfunccell.html | 6 +-
docs/reference/api/typedoc/classes/rpcserver.html | 14 +-
docs/reference/api/typedoc/classes/scalar.html | 6 +-
.../api/typedoc/classes/webgpucontext.html | 12 +-
docs/reference/api/typedoc/enums/argtypecode.html | 30 +-
.../api/typedoc/enums/aynccallbackcode.html | 4 +-
.../api/typedoc/enums/dldatatypecode.html | 8 +-
.../api/typedoc/enums/rpcserverstate.html | 12 +-
docs/reference/api/typedoc/enums/sizeof.html | 18 +-
docs/reference/api/typedoc/index.html | 112 +-
.../api/typedoc/interfaces/disposable.html | 2 +-
.../api/typedoc/interfaces/functioninfo.html | 6 +-
.../api/typedoc/interfaces/libraryprovider.html | 4 +-
docs/searchindex.js | 2 +-
.../vta/tutorials/autotvm/sg_execution_times.html | 6 +-
.../tutorials/frontend/deploy_classification.html | 2 +-
.../vta/tutorials/frontend/deploy_detection.html | 2 +-
.../vta/tutorials/frontend/sg_execution_times.html | 6 +-
.../vta/tutorials/optimize/sg_execution_times.html | 6 +-
docs/topic/vta/tutorials/sg_execution_times.html | 6 +-
docs/tutorial/auto_scheduler_matmul_x86.html | 5 +-
docs/tutorial/autotvm_relay_x86.html | 258 +--
docs/tutorial/cross_compilation_and_rpc.html | 2 +-
docs/tutorial/intro_topi.html | 2 +-
docs/tutorial/sg_execution_times.html | 26 +-
docs/tutorial/tensor_expr_get_started.html | 44 +-
115 files changed, 2995 insertions(+), 4708 deletions(-)
diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index cdbfd724e..4c51366bc 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -98,7 +98,7 @@ In this section, we download a pretrained imagenet model and classify an image.
.. code-block:: none
- Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip2e9fcbdd-04fb-48d4-9551-2fca667db007 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+ Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip241648fa-9037-4f2c-98f8-146ee42e6cc7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
x (1, 3, 224, 224)
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index c10c7c0f8..6355c0db5 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -100,7 +100,7 @@ Load a pretrained OneFlow model and save model
.. code-block:: none
Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
0%| | 0.00/41.5M [00:00<?, ?B/s]
0%| | 16.0k/41.5M [00:00<07:45, 93.5kB/s]
0%| | 48.0k/41.5M [00:00<04:53, 148kB/s]
0%| | 96.0k/41.5M [00:00<03:28, 208kB/s]
0%| | 168k/41.5M [00:00<02:28, 291kB/s]
1%| | 336k/41.5M [00:00<01:20, 538kB/s]
2%|1 | 648k/41.5M [00:01<00:44, 971kB/s]
3%|3 | 1.27M/41.5M [00:01<00:22, 1.88MB/s]
6%|6 | 2.53M/41.5M [00:01<00:11, 3.68MB/s]
10%|9 | 4.03M/41.5M [00:01<00:07, 5.32MB/s]
13%|#3 | 5.52M/41.5M [00:01<00:05, 7.14MB/s]
16%|#5 | 6.59M/41.5M [00:01<00:04, 8.01MB/s]
18%|#7 | 7.44M/41.5M [00:01<00:04, 7.54MB/s]
20%|#9 | 8.21M/41.5M [00:02<00:05, 6.01MB/s]
23%|##2 | 9.52M/41.5M [00:02<00:05, 6.58MB/s]
25%|##4 | 10.3M/41.5M [00:02<00:05, 6.42MB/s]
28%|##8 | 11.8M/41.5M [00:02<00:04, 7.23MB/s]
32%|###1 | 13.3M/41.5M [00:02<00
:03, 7.76MB/s]
35%|###4 | 14.4M/41.5M [00:03<00:04, 6.95MB/s]
38%|###8 | 15.9M/41.5M [00:03<00:03, 7.52MB/s]
40%|#### | 16.7M/41.5M [00:03<00:03, 6.65MB/s]
44%|####3 | 18.1M/41.5M [00:03<00:03, 7.24MB/s]
47%|####7 | 19.6M/41.5M [00:03<00:02, 7.71MB/s]
51%|##### | 21.1M/41.5M [00:03<00:02, 8.07MB/s]
55%|#####4 | 22.6M/41.5M [00:04<00:02, 8.32MB/s]
58%|#####8 | 24.1M/41.5M [00:04<00:01, 9.53MB/s]
62%|######1 | 25.6M/41.5M [00:04<00:01, 10.2MB/s]
64%|######4 | 26.6M/41.5M [00:04<00:02, 7.20MB/s]
67%|######6 | 27.6M/41.5M [00:04<00:02, 6.90MB/s]
68%|######8 | 28.4M/41.5M [00:04<00:02, 6.23MB/s]
71%|#######1 | 29.5M/41.5M [00:05<00:02, 6.27MB/s]
74%|#######3 | 30.6M/41.5M [00:05<00:01, 6.32MB/s]
76%|#######6 | 31.7M/41.5M [00:05<00:01, 6.39MB/s]
79%|#######9 | 32.8M/41.5M [00:05<00:01, 6.95MB/s]
82%|########1 | 33.9M/41.5M [00:05<00:01, 7.26MB/s]
84%|####
####4 | 35.0M/41.5M [00:05<00:00, 7.11MB/s]
87%|########7 | 36.2M/41.5M [00:06<00:00, 7.65MB/s]
89%|########9 | 36.9M/41.5M [00:06<00:00, 7.29MB/s]
91%|######### | 37.7M/41.5M [00:06<00:00, 6.29MB/s]
93%|#########2| 38.5M/41.5M [00:06<00:00, 5.96MB/s]
94%|#########4| 39.1M/41.5M [00:06<00:00, 5.12MB/s]
97%|#########6| 40.1M/41.5M [00:06<00:00, 5.32MB/s]
99%|#########8| 40.9M/41.5M [00:07<00:00, 5.20MB/s]
100%|##########| 41.5M/41.5M [00:07<00:00, 6.12MB/s]
+
0%| | 0.00/41.5M [00:00<?, ?B/s]
0%| | 16.0k/41.5M [00:00<15:35, 46.5kB/s]
0%| | 56.0k/41.5M [00:00<05:50, 124kB/s]
0%| | 72.0k/41.5M [00:00<06:26, 112kB/s]
0%| | 88.0k/41.5M [00:00<06:51, 105kB/s]
0%| | 104k/41.5M [00:01<07:08, 101kB/s]
0%| | 120k/41.5M [00:01<07:20, 98.5kB/s]
0%| | 136k/41.5M [00:01<07:28, 96.7kB/s]
0%| | 152k/41.5M [00:01<07:33, 95.5kB/s]
0%| | 168k/41.5M [00:01<07:37, 94.6kB/s]
0%| | 184k/41.5M [00:01<07:40, 94.1kB/s]
0%| | 200k/41.5M [00:02<10:02, 71.9kB/s]
1%| | 216k/41.5M [00:02<11:41, 61.7kB/s]
1%| | 240k/41.5M [00:02<09:08, 78.9kB/s]
1%| | 256k/41.5M [00:03<15:05, 47.7kB/s]
1%| | 264k/41.5M [00:03<15:09, 47.5kB/s]
1%| | 272k/41.5M [00:03<15:14, 47.3kB/s]
1%| | 280k/41.5M [00:04<15:17, 47.1
kB/s]
1%| | 288k/41.5M [00:04<15:20, 46.9kB/s]
1%| | 296k/41.5M [00:04<15:23, 46.8kB/s]
1%| | 304k/41.5M [00:04<15:25, 46.7kB/s]
1%| | 312k/41.5M [00:04<15:26, 46.6kB/s]
1%| | 320k/41.5M [00:04<16:28, 43.7kB/s]
1%| | 328k/41.5M [00:05<19:36, 36.7kB/s]
1%| | 336k/41.5M [00:05<18:24, 39.1kB/s]
1%| | 352k/41.5M [00:05<14:23, 50.0kB/s]
1%| | 360k/41.5M [00:06<21:21, 33.7kB/s]
1%| | 376k/41.5M [00:06<20:26, 35.2kB/s]
1%| | 384k/41.5M [00:06<19:18, 37.2kB/s]
1%| | 392k/41.5M [00:07<21:16, 33.8kB/s]
1%| | 400k/41.5M [00:07<23:31, 30.5kB/s]
1%| | 408k/41.5M [00:07<21:37, 33.2kB/s]
1%| | 416k/41.5M [00:07<23:56, 30.0kB/s]
1%| | 424k/41.5M [00:08<21:31, 33.3kB/s]
1%|1 | 432k/41.5M [00:08<19:46, 36.3kB/s]
1%|1 | 440k/41.5M [00:08<
23:02, 31.1kB/s]
1%|1 | 456k/41.5M [00:08<18:05, 39.6kB/s]
1%|1 | 464k/41.5M [00:09<19:03, 37.6kB/s]
1%|1 | 472k/41.5M [00:09<18:07, 39.6kB/s]
1%|1 | 480k/41.5M [00:09<17:23, 41.2kB/s]
1%|1 | 488k/41.5M [00:09<16:51, 42.5kB/s]
1%|1 | 496k/41.5M [00:09<16:27, 43.6kB/s]
1%|1 | 504k/41.5M [00:10<14:15, 50.3kB/s]
1%|1 | 512k/41.5M [00:10<14:36, 49.1kB/s]
1%|1 | 528k/41.5M [00:10<12:57, 55.2kB/s]
1%|1 | 536k/41.5M [00:10<13:34, 52.8kB/s]
1%|1 | 544k/41.5M [00:10<13:18, 53.8kB/s]
1%|1 | 560k/41.5M [00:10<11:31, 62.0kB/s]
1%|1 | 568k/41.5M [00:11<11:46, 60.8kB/s]
1%|1 | 584k/41.5M [00:11<10:42, 66.7kB/s]
1%|1 | 592k/41.5M [00:11<11:07, 64.3kB/s]
1%|1 | 608k/41.5M [00:11<09:47, 73.0kB/s]
1%|1 | 624k/41.5M [00:11<09:02, 79.0kB/s]
2%|1 | 640k/41
.5M [00:12<08:35, 83.2kB/s]
2%|1 | 656k/41.5M [00:12<08:17, 86.1kB/s]
2%|1 | 672k/41.5M [00:12<08:06, 88.1kB/s]
2%|1 | 688k/41.5M [00:12<07:58, 89.5kB/s]
2%|1 | 704k/41.5M [00:12<10:15, 69.6kB/s]
2%|1 | 736k/41.5M [00:13<07:35, 93.9kB/s]
2%|1 | 752k/41.5M [00:13<07:36, 93.5kB/s]
2%|1 | 768k/41.5M [00:13<07:37, 93.3kB/s]
2%|1 | 784k/41.5M [00:13<07:38, 93.2kB/s]
2%|1 | 800k/41.5M [00:13<07:38, 93.1kB/s]
2%|1 | 816k/41.5M [00:13<07:38, 93.0kB/s]
2%|1 | 832k/41.5M [00:14<07:13, 98.4kB/s]
2%|1 | 848k/41.5M [00:14<09:35, 74.1kB/s]
2%|2 | 880k/41.5M [00:14<07:17, 97.3kB/s]
2%|2 | 896k/41.5M [00:14<07:23, 95.9kB/s]
2%|2 | 912k/41.5M [00:15<07:27, 95.1kB/s]
2%|2 | 928k/41.5M [00:15<07:29, 94.7kB/s]
2%|2 | 944k/41.5M [00:15<07:31, 94.1kB/s]
2%|2
| 960k/41.5M [00:15<11:56, 59.4kB/s]
2%|2 | 0.98M/41.5M [00:16<07:27, 95.0kB/s]
2%|2 | 0.99M/41.5M [00:16<07:29, 94.5kB/s]
2%|2 | 1.01M/41.5M [00:16<09:22, 75.5kB/s]
2%|2 | 1.03M/41.5M [00:16<07:53, 89.5kB/s]
3%|2 | 1.05M/41.5M [00:16<07:49, 90.3kB/s]
3%|2 | 1.06M/41.5M [00:17<07:46, 90.9kB/s]
3%|2 | 1.08M/41.5M [00:17<07:43, 91.4kB/s]
3%|2 | 1.09M/41.5M [00:17<09:48, 71.9kB/s]
3%|2 | 1.11M/41.5M [00:17<09:41, 72.8kB/s]
3%|2 | 1.12M/41.5M [00:18<10:01, 70.4kB/s]
3%|2 | 1.13M/41.5M [00:18<09:13, 76.4kB/s]
3%|2 | 1.14M/41.5M [00:18<10:21, 68.1kB/s]
3%|2 | 1.16M/41.5M [00:18<09:23, 75.1kB/s]
3%|2 | 1.17M/41.5M [00:18<08:47, 80.2kB/s]
3%|2 | 1.18M/41.5M [00:18<10:01, 70.3kB/s]
3%|2 | 1.20M/41.5M [00:19<11:50, 59.4kB/s]
3%|2 | 1.20M/41.5M [00:19<12:31,
56.2kB/s]
3%|2 | 1.23M/41.5M [00:19<09:03, 77.7kB/s]
3%|2 | 1.24M/41.5M [00:19<09:10, 76.7kB/s]
3%|3 | 1.25M/41.5M [00:19<09:34, 73.4kB/s]
3%|3 | 1.27M/41.5M [00:20<09:30, 73.9kB/s]
3%|3 | 1.27M/41.5M [00:20<09:54, 71.0kB/s]
3%|3 | 1.29M/41.5M [00:20<09:04, 77.5kB/s]
3%|3 | 1.30M/41.5M [00:20<08:33, 82.1kB/s]
3%|3 | 1.31M/41.5M [00:20<10:33, 66.5kB/s]
3%|3 | 1.33M/41.5M [00:21<09:29, 74.0kB/s]
3%|3 | 1.34M/41.5M [00:21<09:27, 74.2kB/s]
3%|3 | 1.35M/41.5M [00:21<09:50, 71.3kB/s]
3%|3 | 1.36M/41.5M [00:21<10:59, 63.8kB/s]
3%|3 | 1.38M/41.5M [00:21<09:40, 72.5kB/s]
3%|3 | 1.39M/41.5M [00:21<08:55, 78.6kB/s]
3%|3 | 1.41M/41.5M [00:22<08:27, 82.8kB/s]
3%|3 | 1.42M/41.5M [00:22<08:09, 85.8kB/s]
3%|3 | 1.44M/41.5M [00:22<07:57, 87.9kB/s]
4%|3
| 1.45M/41.5M [00:22<07:49, 89.3kB/s]
4%|3 | 1.47M/41.5M [00:23<10:03, 69.5kB/s]
4%|3 | 1.48M/41.5M [00:23<10:58, 63.7kB/s]
4%|3 | 1.48M/41.5M [00:23<14:54, 46.9kB/s]
4%|3 | 1.51M/41.5M [00:23<12:37, 55.3kB/s]
4%|3 | 1.52M/41.5M [00:24<13:04, 53.5kB/s]
4%|3 | 1.52M/41.5M [00:24<13:28, 51.8kB/s]
4%|3 | 1.53M/41.5M [00:24<13:50, 50.5kB/s]
4%|3 | 1.54M/41.5M [00:24<14:07, 49.4kB/s]
4%|3 | 1.55M/41.5M [00:24<12:09, 57.4kB/s]
4%|3 | 1.56M/41.5M [00:25<18:49, 37.1kB/s]
4%|3 | 1.59M/41.5M [00:25<13:12, 52.8kB/s]
4%|3 | 1.59M/41.5M [00:25<12:51, 54.2kB/s]
4%|3 | 1.60M/41.5M [00:25<13:19, 52.3kB/s]
4%|3 | 1.61M/41.5M [00:26<17:13, 40.5kB/s]
4%|3 | 1.62M/41.5M [00:26<17:04, 40.8kB/s]
4%|3 | 1.64M/41.5M [00:27<16:19, 42.7kB/s]
4%|3 | 1.65M/41.5M [00:27<16:02
, 43.4kB/s]
4%|3 | 1.66M/41.5M [00:27<15:48, 44.0kB/s]
4%|4 | 1.66M/41.5M [00:27<15:36, 44.6kB/s]
4%|4 | 1.67M/41.5M [00:27<15:27, 45.0kB/s]
4%|4 | 1.68M/41.5M [00:27<15:19, 45.4kB/s]
4%|4 | 1.69M/41.5M [00:28<19:25, 35.8kB/s]
4%|4 | 1.70M/41.5M [00:28<18:43, 37.1kB/s]
4%|4 | 1.72M/41.5M [00:28<14:21, 48.4kB/s]
4%|4 | 1.73M/41.5M [00:29<17:44, 39.2kB/s]
4%|4 | 1.73M/41.5M [00:29<17:05, 40.7kB/s]
4%|4 | 1.74M/41.5M [00:29<16:33, 42.0kB/s]
4%|4 | 1.75M/41.5M [00:29<16:07, 43.0kB/s]
4%|4 | 1.76M/41.5M [00:29<15:48, 43.9kB/s]
4%|4 | 1.77M/41.5M [00:30<16:35, 41.9kB/s]
4%|4 | 1.77M/41.5M [00:30<15:05, 46.0kB/s]
4%|4 | 1.78M/41.5M [00:30<15:02, 46.1kB/s]
4%|4 | 1.79M/41.5M [00:30<15:01, 46.2kB/s]
4%|4 | 1.80M/41.5M [00:30<14:59, 46.3kB/s]
4%|4
| 1.81M/41.5M [00:30<11:33, 60.0kB/s]
4%|4 | 1.82M/41.5M [00:31<12:23, 56.0kB/s]
4%|4 | 1.83M/41.5M [00:31<13:03, 53.1kB/s]
4%|4 | 1.84M/41.5M [00:31<10:40, 64.9kB/s]
4%|4 | 1.85M/41.5M [00:31<11:39, 59.4kB/s]
5%|4 | 1.87M/41.5M [00:31<09:58, 69.4kB/s]
5%|4 | 1.88M/41.5M [00:32<09:03, 76.3kB/s]
5%|4 | 1.89M/41.5M [00:32<10:16, 67.4kB/s]
5%|4 | 1.91M/41.5M [00:32<09:13, 75.0kB/s]
5%|4 | 1.92M/41.5M [00:32<08:36, 80.3kB/s]
5%|4 | 1.93M/41.5M [00:32<09:51, 70.2kB/s]
5%|4 | 1.95M/41.5M [00:32<08:59, 76.8kB/s]
5%|4 | 1.96M/41.5M [00:33<08:27, 81.6kB/s]
5%|4 | 1.98M/41.5M [00:33<10:33, 65.4kB/s]
5%|4 | 2.00M/41.5M [00:33<08:15, 83.6kB/s]
5%|4 | 2.02M/41.5M [00:33<08:01, 86.0kB/s]
5%|4 | 2.03M/41.5M [00:33<07:51, 87.8kB/s]
5%|4 | 2.05M/41.5M [00:34<07:4
3, 89.2kB/s]
5%|4 | 2.06M/41.5M [00:34<07:38, 90.2kB/s]
5%|5 | 2.09M/41.5M [00:34<06:36, 104kB/s]
5%|5 | 2.10M/41.5M [00:34<06:49, 101kB/s]
5%|5 | 2.12M/41.5M [00:34<06:58, 98.5kB/s]
5%|5 | 2.13M/41.5M [00:35<09:12, 74.7kB/s]
5%|5 | 2.16M/41.5M [00:35<07:35, 90.6kB/s]
5%|5 | 2.17M/41.5M [00:35<07:32, 91.1kB/s]
5%|5 | 2.19M/41.5M [00:35<07:30, 91.6kB/s]
5%|5 | 2.20M/41.5M [00:35<07:28, 91.9kB/s]
5%|5 | 2.22M/41.5M [00:36<07:26, 92.2kB/s]
5%|5 | 2.23M/41.5M [00:36<07:25, 92.3kB/s]
5%|5 | 2.26M/41.5M [00:36<06:28, 106kB/s]
5%|5 | 2.27M/41.5M [00:36<08:41, 78.9kB/s]
6%|5 | 2.30M/41.5M [00:36<07:18, 93.6kB/s]
6%|5 | 2.31M/41.5M [00:37<07:19, 93.4kB/s]
6%|5 | 2.33M/41.5M [00:37<07:20, 93.2kB/s]
6%|5 | 2.34M/41.5M [00:37<07:20, 93.1kB/s]
6%|5
| 2.36M/41.5M [00:37<07:21, 93.0kB/s]
6%|5 | 2.38M/41.5M [00:37<07:21, 92.9kB/s]
6%|5 | 2.39M/41.5M [00:38<07:54, 86.3kB/s]
6%|5 | 2.41M/41.5M [00:38<07:12, 94.9kB/s]
6%|5 | 2.42M/41.5M [00:38<07:14, 94.2kB/s]
6%|5 | 2.44M/41.5M [00:38<08:22, 81.6kB/s]
6%|5 | 2.45M/41.5M [00:38<09:09, 74.5kB/s]
6%|5 | 2.46M/41.5M [00:39<12:42, 53.6kB/s]
6%|5 | 2.47M/41.5M [00:39<12:20, 55.3kB/s]
6%|5 | 2.48M/41.5M [00:39<11:13, 60.8kB/s]
6%|6 | 2.49M/41.5M [00:39<11:56, 57.1kB/s]
6%|6 | 2.50M/41.5M [00:39<11:40, 58.3kB/s]
6%|6 | 2.52M/41.5M [00:40<10:43, 63.5kB/s]
6%|6 | 2.52M/41.5M [00:40<11:35, 58.7kB/s]
6%|6 | 2.53M/41.5M [00:40<11:24, 59.6kB/s]
6%|6 | 2.55M/41.5M [00:40<10:32, 64.6kB/s]
6%|6 | 2.55M/41.5M [00:40<10:36, 64.2kB/s]
6%|6 | 2.57M/41.5M [00:40<09:1
9, 73.0kB/s]
6%|6 | 2.59M/41.5M [00:41<08:36, 79.0kB/s]
6%|6 | 2.59M/41.5M [00:41<09:50, 69.1kB/s]
6%|6 | 2.61M/41.5M [00:41<11:36, 58.5kB/s]
6%|6 | 2.63M/41.5M [00:41<08:38, 78.6kB/s]
6%|6 | 2.65M/41.5M [00:42<08:49, 77.0kB/s]
6%|6 | 2.66M/41.5M [00:42<09:10, 74.0kB/s]
6%|6 | 2.66M/41.5M [00:42<10:15, 66.2kB/s]
6%|6 | 2.67M/41.5M [00:42<11:12, 60.6kB/s]
6%|6 | 2.69M/41.5M [00:42<12:32, 54.1kB/s]
6%|6 | 2.70M/41.5M [00:43<13:53, 48.8kB/s]
7%|6 | 2.71M/41.5M [00:43<12:54, 52.5kB/s]
7%|6 | 2.72M/41.5M [00:43<13:16, 51.1kB/s]
7%|6 | 2.73M/41.5M [00:43<14:15, 47.5kB/s]
7%|6 | 2.75M/41.5M [00:44<11:48, 57.4kB/s]
7%|6 | 2.76M/41.5M [00:44<12:21, 54.8kB/s]
7%|6 | 2.77M/41.5M [00:44<12:51, 52.6kB/s]
7%|6 | 2.78M/41.5M [00:44<10:41, 63.3kB/s]
7%|6
| 2.79M/41.5M [00:44<11:31, 58.6kB/s]
7%|6 | 2.80M/41.5M [00:45<09:54, 68.3kB/s]
7%|6 | 2.82M/41.5M [00:45<08:58, 75.3kB/s]
7%|6 | 2.83M/41.5M [00:45<10:06, 66.9kB/s]
7%|6 | 2.84M/41.5M [00:45<09:04, 74.5kB/s]
7%|6 | 2.86M/41.5M [00:45<10:56, 61.7kB/s]
7%|6 | 2.88M/41.5M [00:46<08:21, 80.7kB/s]
7%|6 | 2.90M/41.5M [00:46<08:02, 83.8kB/s]
7%|7 | 2.91M/41.5M [00:46<07:49, 86.2kB/s]
7%|7 | 2.93M/41.5M [00:46<07:39, 87.9kB/s]
7%|7 | 2.95M/41.5M [00:46<07:32, 89.3kB/s]
7%|7 | 2.96M/41.5M [00:47<09:36, 70.1kB/s]
7%|7 | 2.98M/41.5M [00:47<07:45, 86.8kB/s]
7%|7 | 3.00M/41.5M [00:47<08:04, 83.3kB/s]
7%|7 | 3.02M/41.5M [00:47<09:23, 71.6kB/s]
7%|7 | 3.03M/41.5M [00:48<09:15, 72.6kB/s]
7%|7 | 3.04M/41.5M [00:48<10:08, 66.2kB/s]
7%|7 | 3.05M/41.5M [00:48<11:
01, 60.9kB/s]
7%|7 | 3.07M/41.5M [00:48<09:48, 68.5kB/s]
7%|7 | 3.08M/41.5M [00:48<11:17, 59.4kB/s]
7%|7 | 3.09M/41.5M [00:49<11:57, 56.1kB/s]
7%|7 | 3.09M/41.5M [00:49<12:32, 53.5kB/s]
7%|7 | 3.10M/41.5M [00:49<13:00, 51.6kB/s]
7%|7 | 3.11M/41.5M [00:49<13:23, 50.1kB/s]
8%|7 | 3.12M/41.5M [00:49<13:40, 49.0kB/s]
8%|7 | 3.13M/41.5M [00:50<14:00, 47.8kB/s]
8%|7 | 3.15M/41.5M [00:50<11:25, 58.7kB/s]
8%|7 | 3.16M/41.5M [00:50<12:04, 55.5kB/s]
8%|7 | 3.16M/41.5M [00:50<12:35, 53.2kB/s]
8%|7 | 3.18M/41.5M [00:51<13:21, 50.1kB/s]
8%|7 | 3.20M/41.5M [00:51<11:06, 60.2kB/s]
8%|7 | 3.20M/41.5M [00:51<11:47, 56.7kB/s]
8%|7 | 3.21M/41.5M [00:51<12:23, 54.0kB/s]
8%|7 | 3.22M/41.5M [00:51<12:53, 51.9kB/s]
8%|7 | 3.23M/41.5M [00:52<17:01, 39.3kB/s]
8%|7
| 3.25M/41.5M [00:52<10:35, 63.1kB/s]
8%|7 | 3.26M/41.5M [00:52<11:21, 58.9kB/s]
8%|7 | 3.27M/41.5M [00:52<12:27, 53.6kB/s]
8%|7 | 3.29M/41.5M [00:53<10:39, 62.7kB/s]
8%|7 | 3.30M/41.5M [00:53<11:23, 58.6kB/s]
8%|7 | 3.30M/41.5M [00:53<12:02, 55.4kB/s]
8%|8 | 3.32M/41.5M [00:53<11:34, 57.6kB/s]
8%|8 | 3.34M/41.5M [00:53<10:00, 66.6kB/s]
8%|8 | 3.34M/41.5M [00:54<10:53, 61.2kB/s]
8%|8 | 3.35M/41.5M [00:54<11:40, 57.1kB/s]
8%|8 | 3.37M/41.5M [00:54<09:54, 67.2kB/s]
8%|8 | 3.38M/41.5M [00:54<08:55, 74.6kB/s]
8%|8 | 3.40M/41.5M [00:54<08:20, 79.8kB/s]
8%|8 | 3.41M/41.5M [00:54<09:30, 70.0kB/s]
8%|8 | 3.42M/41.5M [00:55<08:40, 76.8kB/s]
8%|8 | 3.44M/41.5M [00:55<08:09, 81.5kB/s]
8%|8 | 3.45M/41.5M [00:55<07:49, 84.9kB/s]
8%|8 | 3.47M/41.5M [00:55<07
:37, 87.2kB/s]
8%|8 | 3.48M/41.5M [00:55<08:34, 77.4kB/s]
8%|8 | 3.49M/41.5M [00:56<09:00, 73.8kB/s]
8%|8 | 3.52M/41.5M [00:56<07:38, 86.9kB/s]
9%|8 | 3.53M/41.5M [00:56<07:29, 88.5kB/s]
9%|8 | 3.55M/41.5M [00:56<09:30, 69.7kB/s]
9%|8 | 3.57M/41.5M [00:56<07:13, 91.8kB/s]
9%|8 | 3.59M/41.5M [00:57<07:39, 86.5kB/s]
9%|8 | 3.60M/41.5M [00:57<07:30, 88.2kB/s]
9%|8 | 3.62M/41.5M [00:57<08:28, 78.1kB/s]
9%|8 | 3.63M/41.5M [00:57<08:04, 81.9kB/s]
9%|8 | 3.65M/41.5M [00:57<07:48, 84.7kB/s]
9%|8 | 3.66M/41.5M [00:58<07:36, 87.0kB/s]
9%|8 | 3.68M/41.5M [00:58<09:34, 69.1kB/s]
9%|8 | 3.70M/41.5M [00:58<08:50, 74.7kB/s]
9%|8 | 3.70M/41.5M [00:58<09:46, 67.5kB/s]
9%|8 | 3.72M/41.5M [00:59<09:27, 69.8kB/s]
9%|8 | 3.73M/41.5M [00:59<09:43, 67.9kB/s]
9%|9
| 3.73M/41.5M [00:59<13:45, 48.0kB/s]
9%|9 | 3.76M/41.5M [00:59<10:32, 62.6kB/s]
9%|9 | 3.77M/41.5M [00:59<09:27, 69.7kB/s]
9%|9 | 3.79M/41.5M [01:00<09:18, 70.7kB/s]
9%|9 | 3.80M/41.5M [01:00<12:05, 54.5kB/s]
9%|9 | 3.82M/41.5M [01:00<09:24, 69.9kB/s]
9%|9 | 3.83M/41.5M [01:00<10:14, 64.3kB/s]
9%|9 | 3.84M/41.5M [01:01<09:11, 71.5kB/s]
9%|9 | 3.85M/41.5M [01:01<10:08, 64.8kB/s]
9%|9 | 3.87M/41.5M [01:01<11:33, 56.8kB/s]
9%|9 | 3.89M/41.5M [01:01<08:37, 76.2kB/s]
9%|9 | 3.91M/41.5M [01:01<08:39, 75.8kB/s]
9%|9 | 3.91M/41.5M [01:02<09:01, 72.8kB/s]
9%|9 | 3.92M/41.5M [01:02<10:01, 65.4kB/s]
9%|9 | 3.94M/41.5M [01:02<08:58, 73.2kB/s]
10%|9 | 3.95M/41.5M [01:02<08:19, 78.8kB/s]
10%|9 | 3.97M/41.5M [01:02<08:27, 77.5kB/s]
10%|9 | 3.98M/41.5M [01:03<0
8:00, 81.8kB/s]
10%|9 | 4.00M/41.5M [01:03<07:42, 84.9kB/s]
10%|9 | 4.02M/41.5M [01:03<07:30, 87.2kB/s]
10%|9 | 4.03M/41.5M [01:03<07:22, 88.8kB/s]
10%|9 | 4.05M/41.5M [01:03<06:45, 96.8kB/s]
10%|9 | 4.06M/41.5M [01:03<06:50, 95.5kB/s]
10%|9 | 4.08M/41.5M [01:04<06:54, 94.7kB/s]
10%|9 | 4.10M/41.5M [01:04<06:02, 108kB/s]
10%|9 | 4.12M/41.5M [01:04<06:18, 104kB/s]
10%|9 | 4.13M/41.5M [01:04<06:30, 100kB/s]
10%|# | 4.16M/41.5M [01:04<05:49, 112kB/s]
10%|# | 4.18M/41.5M [01:04<05:25, 120kB/s]
10%|# | 4.20M/41.5M [01:05<05:10, 126kB/s]
10%|# | 4.23M/41.5M [01:05<05:00, 130kB/s]
10%|# | 4.24M/41.5M [01:05<05:52, 111kB/s]
10%|# | 4.27M/41.5M [01:05<05:05, 128kB/s]
10%|# | 4.29M/41.5M [01:05<05:19, 122kB/s]
10%|# | 4.30M/41.5M [01:06<05:42, 114kB/s]
10%|# | 4
.33M/41.5M [01:06<05:21, 121kB/s]
10%|# | 4.34M/41.5M [01:06<05:21, 121kB/s]
11%|# | 4.37M/41.5M [01:06<05:06, 127kB/s]
11%|# | 4.38M/41.5M [01:06<05:59, 108kB/s]
11%|# | 4.41M/41.5M [01:06<05:09, 126kB/s]
11%|# | 4.43M/41.5M [01:07<05:20, 121kB/s]
11%|# | 4.45M/41.5M [01:07<05:43, 113kB/s]
11%|# | 4.47M/41.5M [01:07<05:21, 121kB/s]
11%|# | 4.48M/41.5M [01:07<06:17, 103kB/s]
11%|# | 4.50M/41.5M [01:07<05:54, 109kB/s]
11%|# | 4.52M/41.5M [01:07<05:27, 118kB/s]
11%|# | 4.54M/41.5M [01:08<05:50, 111kB/s]
11%|# | 4.55M/41.5M [01:08<06:07, 105kB/s]
11%|#1 | 4.57M/41.5M [01:08<06:56, 92.9kB/s]
11%|#1 | 4.59M/41.5M [01:08<06:49, 94.6kB/s]
11%|#1 | 4.60M/41.5M [01:08<06:51, 94.1kB/s]
11%|#1 | 4.62M/41.5M [01:09<07:00, 91.9kB/s]
11%|#1 | 4.63M/41.5M [01:09<06:59, 92.2kB/s]
1
1%|#1 | 4.65M/41.5M [01:09<06:50, 94.2kB/s]
11%|#1 | 4.66M/41.5M [01:09<06:51, 93.7kB/s]
11%|#1 | 4.68M/41.5M [01:09<06:53, 93.4kB/s]
11%|#1 | 4.70M/41.5M [01:09<06:01, 107kB/s]
11%|#1 | 4.71M/41.5M [01:10<05:46, 111kB/s]
11%|#1 | 4.73M/41.5M [01:10<05:34, 115kB/s]
11%|#1 | 4.74M/41.5M [01:10<05:58, 107kB/s]
11%|#1 | 4.76M/41.5M [01:10<06:07, 105kB/s]
12%|#1 | 4.77M/41.5M [01:10<06:21, 101kB/s]
12%|#1 | 4.79M/41.5M [01:10<05:39, 113kB/s]
12%|#1 | 4.80M/41.5M [01:10<05:33, 115kB/s]
12%|#1 | 4.82M/41.5M [01:11<05:13, 123kB/s]
12%|#1 | 4.84M/41.5M [01:11<05:43, 112kB/s]
12%|#1 | 4.85M/41.5M [01:11<05:12, 123kB/s]
12%|#1 | 4.87M/41.5M [01:11<06:35, 97.2kB/s]
12%|#1 | 4.89M/41.5M [01:11<05:48, 110kB/s]
12%|#1 | 4.91M/41.5M [01:11<06:05, 105kB/s]
12%|#1 | 4.92M/41.5M [01:12<06:18
, 101kB/s]
12%|#1 | 4.95M/41.5M [01:12<05:40, 113kB/s]
12%|#1 | 4.97M/41.5M [01:12<05:17, 121kB/s]
12%|#2 | 4.98M/41.5M [01:12<05:40, 112kB/s]
12%|#2 | 5.00M/41.5M [01:12<07:48, 81.7kB/s]
12%|#2 | 5.02M/41.5M [01:13<06:38, 96.0kB/s]
12%|#2 | 5.05M/41.5M [01:13<05:19, 119kB/s]
12%|#2 | 5.07M/41.5M [01:13<05:40, 112kB/s]
12%|#2 | 5.09M/41.5M [01:13<05:57, 107kB/s]
12%|#2 | 5.11M/41.5M [01:13<05:28, 116kB/s]
12%|#2 | 5.12M/41.5M [01:14<06:26, 98.7kB/s]
12%|#2 | 5.15M/41.5M [01:14<05:47, 110kB/s]
12%|#2 | 5.16M/41.5M [01:14<06:02, 105kB/s]
12%|#2 | 5.18M/41.5M [01:14<05:36, 113kB/s]
13%|#2 | 5.20M/41.5M [01:14<05:56, 107kB/s]
13%|#2 | 5.21M/41.5M [01:14<06:11, 102kB/s]
13%|#2 | 5.23M/41.5M [01:15<06:22, 99.5kB/s]
13%|#2 | 5.24M/41.5M [01:15<06:29, 97.5kB/s]
13%|#2 | 5.26M/
41.5M [01:15<06:35, 96.0kB/s]
13%|#2 | 5.27M/41.5M [01:15<05:50, 108kB/s]
13%|#2 | 5.29M/41.5M [01:15<05:39, 112kB/s]
13%|#2 | 5.30M/41.5M [01:15<05:47, 109kB/s]
13%|#2 | 5.32M/41.5M [01:15<05:34, 113kB/s]
13%|#2 | 5.34M/41.5M [01:16<05:06, 124kB/s]
13%|#2 | 5.35M/41.5M [01:16<04:56, 128kB/s]
13%|#2 | 5.37M/41.5M [01:16<05:29, 115kB/s]
13%|#2 | 5.38M/41.5M [01:16<07:23, 85.3kB/s]
13%|#3 | 5.42M/41.5M [01:16<04:58, 127kB/s]
13%|#3 | 5.44M/41.5M [01:16<04:45, 132kB/s]
13%|#3 | 5.45M/41.5M [01:17<05:15, 120kB/s]
13%|#3 | 5.47M/41.5M [01:17<05:03, 124kB/s]
13%|#3 | 5.48M/41.5M [01:17<07:59, 78.7kB/s]
13%|#3 | 5.52M/41.5M [01:17<04:56, 127kB/s]
13%|#3 | 5.55M/41.5M [01:18<05:15, 119kB/s]
13%|#3 | 5.56M/41.5M [01:18<06:34, 95.4kB/s]
13%|#3 | 5.59M/41.5M [01:18<06:24, 97.9kB/s]
14%
|#3 | 5.60M/41.5M [01:18<06:29, 96.6kB/s]
14%|#3 | 5.62M/41.5M [01:18<06:33, 95.6kB/s]
14%|#3 | 5.63M/41.5M [01:19<06:36, 94.9kB/s]
14%|#3 | 5.65M/41.5M [01:19<05:58, 105kB/s]
14%|#3 | 5.66M/41.5M [01:19<05:44, 109kB/s]
14%|#3 | 5.68M/41.5M [01:19<06:01, 104kB/s]
14%|#3 | 5.70M/41.5M [01:19<05:55, 106kB/s]
14%|#3 | 5.71M/41.5M [01:19<05:40, 110kB/s]
14%|#3 | 5.73M/41.5M [01:19<05:59, 104kB/s]
14%|#3 | 5.74M/41.5M [01:20<06:12, 101kB/s]
14%|#3 | 5.76M/41.5M [01:20<05:38, 111kB/s]
14%|#3 | 5.77M/41.5M [01:20<05:29, 114kB/s]
14%|#3 | 5.79M/41.5M [01:20<05:51, 107kB/s]
14%|#3 | 5.80M/41.5M [01:20<05:18, 117kB/s]
14%|#4 | 5.82M/41.5M [01:20<05:27, 114kB/s]
14%|#4 | 5.84M/41.5M [01:20<05:21, 116kB/s]
14%|#4 | 5.85M/41.5M [01:21<04:58, 125kB/s]
14%|#4 | 5.87M/41.5M [01:21<05:01, 12
4kB/s]
14%|#4 | 5.88M/41.5M [01:21<04:47, 130kB/s]
14%|#4 | 5.90M/41.5M [01:21<04:53, 127kB/s]
14%|#4 | 5.91M/41.5M [01:21<07:27, 83.4kB/s]
14%|#4 | 5.95M/41.5M [01:21<04:25, 140kB/s]
14%|#4 | 5.98M/41.5M [01:22<04:45, 130kB/s]
14%|#4 | 6.00M/41.5M [01:22<04:53, 127kB/s]
14%|#4 | 6.02M/41.5M [01:22<04:41, 132kB/s]
15%|#4 | 6.03M/41.5M [01:22<05:10, 120kB/s]
15%|#4 | 6.05M/41.5M [01:22<05:58, 104kB/s]
15%|#4 | 6.06M/41.5M [01:23<06:09, 101kB/s]
15%|#4 | 6.09M/41.5M [01:23<07:09, 86.5kB/s]
15%|#4 | 6.11M/41.5M [01:23<05:52, 105kB/s]
15%|#4 | 6.12M/41.5M [01:23<06:03, 102kB/s]
15%|#4 | 6.14M/41.5M [01:23<06:13, 99.3kB/s]
15%|#4 | 6.16M/41.5M [01:24<06:20, 97.3kB/s]
15%|#4 | 6.17M/41.5M [01:24<10:08, 60.8kB/s]
15%|#4 | 6.20M/41.5M [01:24<07:06, 86.7kB/s]
15%|#4 | 6.22M/41.
5M [01:24<07:44, 79.7kB/s]
15%|#5 | 6.23M/41.5M [01:25<07:27, 82.6kB/s]
15%|#5 | 6.25M/41.5M [01:25<07:14, 85.1kB/s]
15%|#5 | 6.27M/41.5M [01:25<07:04, 87.1kB/s]
15%|#5 | 6.28M/41.5M [01:25<06:56, 88.6kB/s]
15%|#5 | 6.30M/41.5M [01:25<06:50, 89.8kB/s]
15%|#5 | 6.31M/41.5M [01:26<06:46, 90.6kB/s]
15%|#5 | 6.33M/41.5M [01:26<07:47, 78.8kB/s]
15%|#5 | 6.35M/41.5M [01:26<07:16, 84.4kB/s]
15%|#5 | 6.37M/41.5M [01:26<07:05, 86.5kB/s]
15%|#5 | 6.38M/41.5M [01:26<06:57, 88.2kB/s]
15%|#5 | 6.40M/41.5M [01:27<06:51, 89.4kB/s]
15%|#5 | 6.41M/41.5M [01:27<06:46, 90.4kB/s]
15%|#5 | 6.43M/41.5M [01:27<06:43, 91.1kB/s]
16%|#5 | 6.45M/41.5M [01:27<06:41, 91.6kB/s]
16%|#5 | 6.46M/41.5M [01:27<08:36, 71.1kB/s]
16%|#5 | 6.49M/41.5M [01:28<06:10, 99.1kB/s]
16%|#5 | 6.51M/41.5M [01:28<06:17, 97.3kB/s]
16%|#5 | 6.52M/41.5M [01:28<06:21, 96.1kB/s]
16%|#5 | 6.54M/41.5M [01:28<06:24, 95.3kB/s]
16%|#5 | 6.55M/41.5M [01:28<06:27, 94.6kB/s]
16%|#5 | 6.57M/41.5M [01:29<06:29, 94.1kB/s]
16%|#5 | 6.59M/41.5M [01:29<06:30, 93.7kB/s]
16%|#5 | 6.60M/41.5M [01:29<06:31, 93.4kB/s]
16%|#5 | 6.62M/41.5M [01:29<06:32, 93.2kB/s]
16%|#5 | 6.63M/41.5M [01:29<06:32, 93.1kB/s]
16%|#6 | 6.65M/41.5M [01:30<08:30, 71.6kB/s]
16%|#6 | 6.68M/41.5M [01:30<06:05, 99.8kB/s]
16%|#6 | 6.70M/41.5M [01:30<06:12, 97.9kB/s]
16%|#6 | 6.71M/41.5M [01:30<08:01, 75.8kB/s]
16%|#6 | 6.74M/41.5M [01:31<05:59, 101kB/s]
16%|#6 | 6.76M/41.5M [01:31<06:06, 99.3kB/s]
16%|#6 | 6.77M/41.5M [01:31<06:13, 97.5kB/s]
16%|#6 | 6.79M/41.5M [01:31<06:42, 90.4kB/s]
16%|#6 | 6.80M/41.5M [01:31<07:09, 84.6kB/s]
16%|#6 | 6.82M/41
.5M [01:31<06:27, 93.7kB/s]
16%|#6 | 6.84M/41.5M [01:32<06:28, 93.4kB/s]
17%|#6 | 6.85M/41.5M [01:32<08:23, 72.1kB/s]
17%|#6 | 6.88M/41.5M [01:32<06:26, 93.8kB/s]
17%|#6 | 6.89M/41.5M [01:32<07:22, 82.1kB/s]
17%|#6 | 6.91M/41.5M [01:33<07:07, 84.8kB/s]
17%|#6 | 6.93M/41.5M [01:33<06:51, 88.0kB/s]
17%|#6 | 6.95M/41.5M [01:33<06:46, 89.2kB/s]
17%|#6 | 6.96M/41.5M [01:33<06:41, 90.1kB/s]
17%|#6 | 6.98M/41.5M [01:33<06:38, 90.9kB/s]
17%|#6 | 6.99M/41.5M [01:34<06:35, 91.4kB/s]
17%|#6 | 7.02M/41.5M [01:34<07:22, 81.6kB/s]
17%|#6 | 7.05M/41.5M [01:34<05:38, 107kB/s]
17%|#7 | 7.06M/41.5M [01:34<05:50, 103kB/s]
17%|#7 | 7.08M/41.5M [01:34<05:59, 100kB/s]
17%|#7 | 7.09M/41.5M [01:35<06:07, 98.2kB/s]
17%|#7 | 7.11M/41.5M [01:35<07:59, 75.2kB/s]
17%|#7 | 7.13M/41.5M [01:35<06:37, 90.6kB/s]
17%|#7 | 7.15M/41.5M [01:35<06:35, 91.1kB/s]
17%|#7 | 7.16M/41.5M [01:35<06:33, 91.5kB/s]
17%|#7 | 7.18M/41.5M [01:36<06:31, 91.9kB/s]
17%|#7 | 7.20M/41.5M [01:36<06:30, 92.1kB/s]
17%|#7 | 7.21M/41.5M [01:36<06:29, 92.3kB/s]
17%|#7 | 7.23M/41.5M [01:36<06:28, 92.5kB/s]
17%|#7 | 7.24M/41.5M [01:36<06:04, 98.4kB/s]
18%|#7 | 7.27M/41.5M [01:37<05:43, 104kB/s]
18%|#7 | 7.28M/41.5M [01:37<05:55, 101kB/s]
18%|#7 | 7.30M/41.5M [01:37<07:51, 76.1kB/s]
18%|#7 | 7.33M/41.5M [01:37<05:31, 108kB/s]
18%|#7 | 7.34M/41.5M [01:37<06:03, 98.6kB/s]
18%|#7 | 7.36M/41.5M [01:38<07:27, 80.0kB/s]
18%|#7 | 7.38M/41.5M [01:38<07:31, 79.2kB/s]
18%|#7 | 7.39M/41.5M [01:38<08:59, 66.2kB/s]
18%|#7 | 7.41M/41.5M [01:38<08:15, 72.1kB/s]
18%|#7 | 7.41M/41.5M [01:39<11:12, 53.1kB/s]
18%|#7 | 7.44M/41.5
M [01:39<08:17, 71.7kB/s]
18%|#7 | 7.45M/41.5M [01:39<09:35, 62.1kB/s]
18%|#7 | 7.46M/41.5M [01:40<10:07, 58.7kB/s]
18%|#8 | 7.48M/41.5M [01:40<09:25, 63.1kB/s]
18%|#8 | 7.48M/41.5M [01:40<09:28, 62.7kB/s]
18%|#8 | 7.49M/41.5M [01:40<10:10, 58.4kB/s]
18%|#8 | 7.51M/41.5M [01:40<08:44, 67.9kB/s]
18%|#8 | 7.52M/41.5M [01:40<09:36, 61.8kB/s]
18%|#8 | 7.53M/41.5M [01:41<08:23, 70.7kB/s]
18%|#8 | 7.55M/41.5M [01:41<07:41, 77.1kB/s]
18%|#8 | 7.56M/41.5M [01:41<07:15, 81.7kB/s]
18%|#8 | 7.58M/41.5M [01:41<07:27, 79.4kB/s]
18%|#8 | 7.59M/41.5M [01:41<07:53, 75.1kB/s]
18%|#8 | 7.59M/41.5M [01:42<09:34, 61.9kB/s]
18%|#8 | 7.62M/41.5M [01:42<06:34, 90.1kB/s]
18%|#8 | 7.63M/41.5M [01:42<06:30, 90.9kB/s]
18%|#8 | 7.65M/41.5M [01:42<08:25, 70.2kB/s]
18%|#8 | 7.67M/41.5M [01:42<06:45, 87.4kB/s]
19%|#8 | 7.69M/41.5M [01:43<08:24, 70.3kB/s]
19%|#8 | 7.70M/41.5M [01:43<07:49, 75.5kB/s]
19%|#8 | 7.72M/41.5M [01:43<09:13, 64.0kB/s]
19%|#8 | 7.73M/41.5M [01:43<08:23, 70.3kB/s]
19%|#8 | 7.74M/41.5M [01:44<09:07, 64.6kB/s]
19%|#8 | 7.76M/41.5M [01:44<14:48, 39.8kB/s]
19%|#8 | 7.79M/41.5M [01:44<08:57, 65.7kB/s]
19%|#8 | 7.80M/41.5M [01:45<08:17, 71.1kB/s]
19%|#8 | 7.82M/41.5M [01:45<11:10, 52.7kB/s]
19%|#8 | 7.84M/41.5M [01:45<08:37, 68.2kB/s]
19%|#8 | 7.86M/41.5M [01:46<09:40, 60.8kB/s]
19%|#8 | 7.88M/41.5M [01:46<08:45, 67.0kB/s]
19%|#9 | 7.89M/41.5M [01:46<08:05, 72.6kB/s]
19%|#9 | 7.91M/41.5M [01:46<07:59, 73.4kB/s]
19%|#9 | 7.92M/41.5M [01:47<08:54, 65.8kB/s]
19%|#9 | 7.94M/41.5M [01:47<08:09, 71.9kB/s]
19%|#9 | 7.95M/41.5M [01:47<07:36, 77.0kB/s]
19%|#9 | 7.97M/41.
5M [01:47<07:13, 81.1kB/s]
19%|#9 | 7.98M/41.5M [01:47<08:50, 66.2kB/s]
19%|#9 | 8.00M/41.5M [01:48<08:04, 72.4kB/s]
19%|#9 | 8.01M/41.5M [01:48<08:52, 65.9kB/s]
19%|#9 | 8.02M/41.5M [01:48<08:01, 72.9kB/s]
19%|#9 | 8.04M/41.5M [01:48<07:27, 78.4kB/s]
19%|#9 | 8.05M/41.5M [01:49<09:04, 64.3kB/s]
19%|#9 | 8.08M/41.5M [01:49<07:30, 77.8kB/s]
20%|#9 | 8.09M/41.5M [01:49<08:30, 68.6kB/s]
20%|#9 | 8.11M/41.5M [01:49<10:31, 55.4kB/s]
20%|#9 | 8.14M/41.5M [01:50<07:56, 73.4kB/s]
20%|#9 | 8.16M/41.5M [01:50<07:54, 73.7kB/s]
20%|#9 | 8.16M/41.5M [01:50<08:08, 71.5kB/s]
20%|#9 | 8.18M/41.5M [01:50<07:35, 76.7kB/s]
20%|#9 | 8.20M/41.5M [01:51<07:38, 76.2kB/s]
20%|#9 | 8.20M/41.5M [01:51<11:14, 51.7kB/s]
20%|#9 | 8.23M/41.5M [01:52<10:53, 53.4kB/s]
20%|#9 | 8.26M/41.5M [01:52<08:32, 68.0kB/s]
20%|#9 | 8.27M/41.5M [01:52<08:12, 70.7kB/s]
20%|#9 | 8.29M/41.5M [01:52<09:20, 62.1kB/s]
20%|#9 | 8.30M/41.5M [01:52<09:50, 59.0kB/s]
20%|## | 8.30M/41.5M [01:53<09:57, 58.2kB/s]
20%|## | 8.32M/41.5M [01:53<09:01, 64.2kB/s]
20%|## | 8.33M/41.5M [01:53<09:18, 62.3kB/s]
20%|## | 8.34M/41.5M [01:54<17:56, 32.3kB/s]
20%|## | 8.39M/41.5M [01:54<08:07, 71.2kB/s]
20%|## | 8.41M/41.5M [01:54<07:41, 75.1kB/s]
20%|## | 8.42M/41.5M [01:55<08:51, 65.3kB/s]
20%|## | 8.44M/41.5M [01:55<08:10, 70.7kB/s]
20%|## | 8.45M/41.5M [01:55<07:38, 75.5kB/s]
20%|## | 8.47M/41.5M [01:55<07:14, 79.6kB/s]
20%|## | 8.48M/41.5M [01:55<06:57, 82.9kB/s]
20%|## | 8.50M/41.5M [01:55<06:44, 85.6kB/s]
21%|## | 8.52M/41.5M [01:56<06:34, 87.5kB/s]
21%|## | 8.53M/41.5M [01:56<06:28, 89.0kB/s]
21%|## | 8.55M/41
.5M [01:56<06:23, 90.1kB/s]
21%|## | 8.56M/41.5M [01:56<06:19, 90.9kB/s]
21%|## | 8.58M/41.5M [01:56<06:17, 91.4kB/s]
21%|## | 8.59M/41.5M [01:56<06:15, 91.8kB/s]
21%|## | 8.61M/41.5M [01:57<06:14, 92.1kB/s]
21%|## | 8.62M/41.5M [01:57<08:05, 71.0kB/s]
21%|## | 8.65M/41.5M [01:57<06:32, 87.8kB/s]
21%|## | 8.66M/41.5M [01:57<06:26, 89.1kB/s]
21%|## | 8.68M/41.5M [01:58<06:21, 90.1kB/s]
21%|## | 8.70M/41.5M [01:58<05:31, 104kB/s]
21%|##1 | 8.72M/41.5M [01:58<07:42, 74.3kB/s]
21%|##1 | 8.76M/41.5M [01:58<04:52, 117kB/s]
21%|##1 | 8.77M/41.5M [01:58<05:09, 111kB/s]
21%|##1 | 8.79M/41.5M [01:59<05:23, 106kB/s]
21%|##1 | 8.80M/41.5M [01:59<05:35, 102kB/s]
21%|##1 | 8.82M/41.5M [01:59<05:43, 99.6kB/s]
21%|##1 | 8.84M/41.5M [01:59<05:50, 97.6kB/s]
21%|##1 | 8.86M/41.5M [01:59<05:11, 110kB/s]
21%|##1 | 8.88M/41.5M [01:59<05:26, 105kB/s]
21%|##1 | 8.90M/41.5M [02:00<04:57, 115kB/s]
21%|##1 | 8.91M/41.5M [02:00<08:23, 67.8kB/s]
22%|##1 | 8.97M/41.5M [02:00<05:07, 111kB/s]
22%|##1 | 8.98M/41.5M [02:01<06:27, 88.0kB/s]
22%|##1 | 9.01M/41.5M [02:01<05:46, 98.3kB/s]
22%|##1 | 9.02M/41.5M [02:01<05:50, 97.1kB/s]
22%|##1 | 9.04M/41.5M [02:01<05:54, 96.0kB/s]
22%|##1 | 9.05M/41.5M [02:02<05:57, 95.2kB/s]
22%|##1 | 9.07M/41.5M [02:02<05:59, 94.5kB/s]
22%|##1 | 9.09M/41.5M [02:02<06:01, 94.0kB/s]
22%|##1 | 9.10M/41.5M [02:02<06:02, 93.7kB/s]
22%|##1 | 9.12M/41.5M [02:02<06:03, 93.4kB/s]
22%|##2 | 9.13M/41.5M [02:02<06:03, 93.2kB/s]
22%|##2 | 9.15M/41.5M [02:03<06:04, 93.1kB/s]
22%|##2 | 9.16M/41.5M [02:03<06:04, 93.0kB/s]
22%|##2 | 9.18M/41.5M [02:03<05:42, 99.0kB/s]
22%|##2 | 9.20M/41.5M
[02:03<05:48, 97.0kB/s]
22%|##2 | 9.21M/41.5M [02:03<08:05, 69.7kB/s]
22%|##2 | 9.25M/41.5M [02:04<04:55, 115kB/s]
22%|##2 | 9.27M/41.5M [02:04<05:10, 109kB/s]
22%|##2 | 9.29M/41.5M [02:04<04:48, 117kB/s]
22%|##2 | 9.30M/41.5M [02:04<05:06, 110kB/s]
22%|##2 | 9.33M/41.5M [02:04<04:44, 119kB/s]
23%|##2 | 9.34M/41.5M [02:04<05:03, 111kB/s]
23%|##2 | 9.37M/41.5M [02:05<04:42, 119kB/s]
23%|##2 | 9.39M/41.5M [02:05<04:29, 125kB/s]
23%|##2 | 9.41M/41.5M [02:05<04:51, 116kB/s]
23%|##2 | 9.43M/41.5M [02:05<04:34, 123kB/s]
23%|##2 | 9.45M/41.5M [02:05<04:23, 128kB/s]
23%|##2 | 9.47M/41.5M [02:06<06:51, 81.7kB/s]
23%|##2 | 9.52M/41.5M [02:06<04:09, 134kB/s]
23%|##2 | 9.54M/41.5M [02:06<04:57, 112kB/s]
23%|##3 | 9.55M/41.5M [02:06<05:10, 108kB/s]
23%|##3 | 9.58M/41.5M [02:07<07:14, 77.0kB/s]
23%|##3
| 9.63M/41.5M [02:07<04:38, 120kB/s]
23%|##3 | 9.65M/41.5M [02:07<04:39, 119kB/s]
23%|##3 | 9.66M/41.5M [02:07<04:55, 113kB/s]
23%|##3 | 9.68M/41.5M [02:08<05:09, 108kB/s]
23%|##3 | 9.70M/41.5M [02:08<05:21, 104kB/s]
23%|##3 | 9.72M/41.5M [02:08<05:09, 108kB/s]
23%|##3 | 9.73M/41.5M [02:08<05:21, 104kB/s]
23%|##3 | 9.75M/41.5M [02:08<05:12, 107kB/s]
24%|##3 | 9.77M/41.5M [02:09<05:03, 110kB/s]
24%|##3 | 9.79M/41.5M [02:09<04:58, 111kB/s]
24%|##3 | 9.80M/41.5M [02:09<07:09, 77.4kB/s]
24%|##3 | 9.84M/41.5M [02:09<05:05, 108kB/s]
24%|##3 | 9.85M/41.5M [02:09<05:18, 104kB/s]
24%|##3 | 9.87M/41.5M [02:10<05:27, 101kB/s]
24%|##3 | 9.89M/41.5M [02:10<04:56, 112kB/s]
24%|##3 | 9.91M/41.5M [02:10<05:11, 106kB/s]
24%|##3 | 9.92M/41.5M [02:10<06:57, 79.2kB/s]
24%|##4 | 9.96M/41.5M [02:11<04:57, 111kB/s]
24%|##4 | 9.98M/41.5M [02:11<05:10, 107kB/s]
24%|##4 | 10.0M/41.5M [02:11<04:46, 115kB/s]
24%|##4 | 10.0M/41.5M [02:11<06:25, 85.5kB/s]
24%|##4 | 10.0M/41.5M [02:11<05:02, 109kB/s]
24%|##4 | 10.1M/41.5M [02:12<05:14, 105kB/s]
24%|##4 | 10.1M/41.5M [02:12<05:23, 102kB/s]
24%|##4 | 10.1M/41.5M [02:12<08:33, 64.1kB/s]
24%|##4 | 10.1M/41.5M [02:12<05:35, 97.9kB/s]
24%|##4 | 10.1M/41.5M [02:13<06:56, 79.0kB/s]
24%|##4 | 10.2M/41.5M [02:13<06:40, 81.9kB/s]
25%|##4 | 10.2M/41.5M [02:13<06:29, 84.4kB/s]
25%|##4 | 10.2M/41.5M [02:14<07:52, 69.4kB/s]
25%|##4 | 10.2M/41.5M [02:14<10:58, 49.8kB/s]
25%|##4 | 10.2M/41.5M [02:14<08:00, 68.2kB/s]
25%|##4 | 10.2M/41.5M [02:15<08:12, 66.6kB/s]
25%|##4 | 10.3M/41.5M [02:15<08:23, 65.1kB/s]
25%|##4 | 10.3M/41.5M [02:15<11:46, 46.3kB/s]
25%|##4 | 10.3M/41.5M
[02:16<08:30, 64.0kB/s]
25%|##4 | 10.3M/41.5M [02:16<07:52, 69.1kB/s]
25%|##4 | 10.3M/41.5M [02:16<08:51, 61.4kB/s]
25%|##4 | 10.4M/41.5M [02:16<08:03, 67.5kB/s]
25%|##5 | 10.4M/41.5M [02:17<07:26, 73.0kB/s]
25%|##5 | 10.4M/41.5M [02:17<10:33, 51.5kB/s]
25%|##5 | 10.4M/41.5M [02:17<06:57, 78.0kB/s]
25%|##5 | 10.4M/41.5M [02:18<08:17, 65.4kB/s]
25%|##5 | 10.5M/41.5M [02:18<08:58, 60.4kB/s]
25%|##5 | 10.5M/41.5M [02:18<09:23, 57.7kB/s]
25%|##5 | 10.5M/41.5M [02:18<09:48, 55.3kB/s]
25%|##5 | 10.5M/41.5M [02:18<10:11, 53.2kB/s]
25%|##5 | 10.5M/41.5M [02:19<08:34, 63.1kB/s]
25%|##5 | 10.5M/41.5M [02:19<09:13, 58.7kB/s]
25%|##5 | 10.5M/41.5M [02:19<09:47, 55.3kB/s]
25%|##5 | 10.5M/41.5M [02:19<08:13, 65.9kB/s]
25%|##5 | 10.5M/41.5M [02:19<08:58, 60.3kB/s]
25%|##5 | 10.5M/41.5M [02:20<07:45, 69.7kB/s]
25%|##5 | 10.6M/41.5M [02:20<08:35, 62.9kB/s]
25%|##5 | 10.6M/41.5M [02:20<07:32, 71.7kB/s]
26%|##5 | 10.6M/41.5M [02:20<08:59, 60.1kB/s]
26%|##5 | 10.6M/41.5M [02:20<06:46, 79.6kB/s]
26%|##5 | 10.6M/41.5M [02:21<06:29, 83.0kB/s]
26%|##5 | 10.6M/41.5M [02:21<08:00, 67.4kB/s]
26%|##5 | 10.7M/41.5M [02:21<07:21, 73.3kB/s]
26%|##5 | 10.7M/41.5M [02:21<06:53, 78.2kB/s]
26%|##5 | 10.7M/41.5M [02:22<08:18, 64.8kB/s]
26%|##5 | 10.7M/41.5M [02:22<06:34, 81.9kB/s]
26%|##5 | 10.7M/41.5M [02:22<09:51, 54.5kB/s]
26%|##5 | 10.8M/41.5M [02:23<07:39, 70.1kB/s]
26%|##5 | 10.8M/41.5M [02:23<08:40, 61.9kB/s]
26%|##5 | 10.8M/41.5M [02:23<08:41, 61.7kB/s]
26%|##6 | 10.8M/41.5M [02:23<08:13, 65.3kB/s]
26%|##6 | 10.8M/41.5M [02:23<08:19, 64.4kB/s]
26%|##6 | 10.8M/41.5M [02:24<07:55, 67.7kB/s]
26%|##6 | 10.8M/41.5
M [02:24<08:05, 66.2kB/s]
26%|##6 | 10.8M/41.5M [02:24<07:45, 69.1kB/s]
26%|##6 | 10.8M/41.5M [02:24<07:58, 67.2kB/s]
26%|##6 | 10.9M/41.5M [02:24<07:09, 74.8kB/s]
26%|##6 | 10.9M/41.5M [02:24<06:40, 80.1kB/s]
26%|##6 | 10.9M/41.5M [02:25<06:22, 83.9kB/s]
26%|##6 | 10.9M/41.5M [02:25<06:10, 86.6kB/s]
26%|##6 | 10.9M/41.5M [02:25<05:35, 95.5kB/s]
26%|##6 | 10.9M/41.5M [02:25<06:51, 77.8kB/s]
26%|##6 | 11.0M/41.5M [02:26<06:03, 88.0kB/s]
26%|##6 | 11.0M/41.5M [02:26<07:29, 71.2kB/s]
27%|##6 | 11.0M/41.5M [02:26<06:09, 86.4kB/s]
27%|##6 | 11.0M/41.5M [02:26<07:31, 70.8kB/s]
27%|##6 | 11.0M/41.5M [02:27<08:07, 65.5kB/s]
27%|##6 | 11.0M/41.5M [02:27<08:43, 60.9kB/s]
27%|##6 | 11.0M/41.5M [02:27<09:17, 57.3kB/s]
27%|##6 | 11.1M/41.5M [02:27<07:58, 66.7kB/s]
27%|##6 | 11.1M/41.5M [02:27<08:42, 61.1kB/s]
27%|##6 | 11.1M/41.5M [02:28<09:19, 57.0kB/s]
27%|##6 | 11.1M/41.5M [02:28<10:10, 52.2kB/s]
27%|##6 | 11.1M/41.5M [02:28<08:33, 62.0kB/s]
27%|##6 | 11.1M/41.5M [02:28<09:09, 58.0kB/s]
27%|##6 | 11.1M/41.5M [02:28<09:40, 54.9kB/s]
27%|##6 | 11.1M/41.5M [02:29<08:06, 65.4kB/s]
27%|##6 | 11.1M/41.5M [02:29<08:50, 60.0kB/s]
27%|##6 | 11.2M/41.5M [02:29<07:38, 69.4kB/s]
27%|##6 | 11.2M/41.5M [02:29<08:26, 62.7kB/s]
27%|##6 | 11.2M/41.5M [02:29<09:08, 57.9kB/s]
27%|##6 | 11.2M/41.5M [02:29<07:45, 68.2kB/s]
27%|##7 | 11.2M/41.5M [02:30<07:00, 75.5kB/s]
27%|##7 | 11.2M/41.5M [02:30<06:33, 80.7kB/s]
27%|##7 | 11.2M/41.5M [02:30<06:16, 84.3kB/s]
27%|##7 | 11.3M/41.5M [02:30<06:05, 86.8kB/s]
27%|##7 | 11.3M/41.5M [02:30<05:57, 88.6kB/s]
27%|##7 | 11.3M/41.5M [02:31<05:52, 89.8kB/s]
27%|##7 | 11.3M/41.
5M [02:31<05:48, 90.7kB/s]
27%|##7 | 11.3M/41.5M [02:31<05:46, 91.3kB/s]
27%|##7 | 11.3M/41.5M [02:31<05:44, 91.7kB/s]
27%|##7 | 11.4M/41.5M [02:31<04:58, 106kB/s]
27%|##7 | 11.4M/41.5M [02:31<05:09, 102kB/s]
27%|##7 | 11.4M/41.5M [02:32<05:18, 99.2kB/s]
28%|##7 | 11.4M/41.5M [02:32<06:08, 85.5kB/s]
28%|##7 | 11.4M/41.5M [02:32<05:19, 98.7kB/s]
28%|##7 | 11.5M/41.5M [02:32<05:24, 97.0kB/s]
28%|##7 | 11.5M/41.5M [02:33<06:57, 75.5kB/s]
28%|##7 | 11.5M/41.5M [02:33<06:36, 79.3kB/s]
28%|##7 | 11.5M/41.5M [02:33<06:20, 82.7kB/s]
28%|##7 | 11.5M/41.5M [02:34<09:21, 56.0kB/s]
28%|##7 | 11.5M/41.5M [02:34<07:14, 72.3kB/s]
28%|##7 | 11.6M/41.5M [02:34<07:10, 72.9kB/s]
28%|##7 | 11.6M/41.5M [02:34<07:55, 66.0kB/s]
28%|##7 | 11.6M/41.5M [02:34<07:16, 71.8kB/s]
28%|##7 | 11.6M/41.5M [02:35<06:48, 76.8kB/s]
28%|##8 | 11.6M/41.5M [02:35<06:51, 76.2kB/s]
28%|##8 | 11.6M/41.5M [02:35<07:08, 73.1kB/s]
28%|##8 | 11.6M/41.5M [02:35<06:38, 78.6kB/s]
28%|##8 | 11.7M/41.5M [02:35<06:18, 82.6kB/s]
28%|##8 | 11.7M/41.5M [02:35<06:05, 85.6kB/s]
28%|##8 | 11.7M/41.5M [02:36<05:56, 87.7kB/s]
28%|##8 | 11.7M/41.5M [02:36<05:03, 103kB/s]
28%|##8 | 11.7M/41.5M [02:36<07:07, 73.0kB/s]
28%|##8 | 11.8M/41.5M [02:36<04:55, 105kB/s]
28%|##8 | 11.8M/41.5M [02:37<05:24, 96.0kB/s]
28%|##8 | 11.8M/41.5M [02:37<05:27, 95.2kB/s]
28%|##8 | 11.8M/41.5M [02:37<06:37, 78.3kB/s]
29%|##8 | 11.8M/41.5M [02:37<05:34, 93.1kB/s]
29%|##8 | 11.8M/41.5M [02:38<08:26, 61.4kB/s]
29%|##8 | 11.9M/41.5M [02:38<06:25, 80.6kB/s]
29%|##8 | 11.9M/41.5M [02:38<06:28, 79.9kB/s]
29%|##8 | 11.9M/41.5M [02:38<06:14, 82.7kB/s]
29%|##8 | 11.9M/41.
5M [02:39<06:04, 85.2kB/s]
29%|##8 | 11.9M/41.5M [02:39<07:08, 72.2kB/s]
29%|##8 | 12.0M/41.5M [02:39<07:00, 73.7kB/s]
29%|##8 | 12.0M/41.5M [02:39<06:35, 78.3kB/s]
29%|##8 | 12.0M/41.5M [02:39<06:17, 82.0kB/s]
29%|##8 | 12.0M/41.5M [02:40<07:42, 66.9kB/s]
29%|##8 | 12.0M/41.5M [02:40<05:52, 87.5kB/s]
29%|##9 | 12.0M/41.5M [02:40<07:16, 70.7kB/s]
29%|##9 | 12.1M/41.5M [02:40<06:47, 75.7kB/s]
29%|##9 | 12.1M/41.5M [02:41<06:44, 76.2kB/s]
29%|##9 | 12.1M/41.5M [02:41<07:40, 66.9kB/s]
29%|##9 | 12.1M/41.5M [02:41<07:03, 72.8kB/s]
29%|##9 | 12.1M/41.5M [02:41<07:44, 66.3kB/s]
29%|##9 | 12.1M/41.5M [02:42<07:00, 73.2kB/s]
29%|##9 | 12.1M/41.5M [02:42<06:32, 78.5kB/s]
29%|##9 | 12.1M/41.5M [02:42<09:25, 54.4kB/s]
29%|##9 | 12.2M/41.5M [02:42<05:59, 85.5kB/s]
29%|##9 | 12.2M/41.5M [02:43<08:49, 58.0kB/s]
29%|##9 | 12.2M/41.5M [02:43<06:12, 82.5kB/s]
30%|##9 | 12.2M/41.5M [02:43<06:40, 76.6kB/s]
30%|##9 | 12.3M/41.5M [02:44<07:05, 72.1kB/s]
30%|##9 | 12.3M/41.5M [02:44<08:30, 60.0kB/s]
30%|##9 | 12.3M/41.5M [02:44<07:17, 70.0kB/s]
30%|##9 | 12.3M/41.5M [02:44<06:47, 75.2kB/s]
30%|##9 | 12.3M/41.5M [02:45<07:59, 63.8kB/s]
30%|##9 | 12.3M/41.5M [02:45<08:51, 57.5kB/s]
30%|##9 | 12.3M/41.5M [02:45<09:12, 55.3kB/s]
30%|##9 | 12.4M/41.5M [02:45<07:58, 63.8kB/s]
30%|##9 | 12.4M/41.5M [02:45<08:32, 59.5kB/s]
30%|##9 | 12.4M/41.5M [02:46<07:27, 68.1kB/s]
30%|##9 | 12.4M/41.5M [02:46<06:48, 74.6kB/s]
30%|##9 | 12.4M/41.5M [02:46<07:35, 66.9kB/s]
30%|##9 | 12.4M/41.5M [02:46<06:51, 74.1kB/s]
30%|##9 | 12.4M/41.5M [02:47<08:13, 61.7kB/s]
30%|### | 12.5M/41.5M [02:47<06:40, 76.0kB/s]
30%|### | 12.5M/41
.5M [02:47<06:56, 73.1kB/s]
30%|### | 12.5M/41.5M [02:47<07:42, 65.8kB/s]
30%|### | 12.5M/41.5M [02:47<08:50, 57.3kB/s]
30%|### | 12.5M/41.5M [02:48<06:35, 76.8kB/s]
30%|### | 12.5M/41.5M [02:48<06:39, 76.1kB/s]
30%|### | 12.5M/41.5M [02:48<06:55, 73.1kB/s]
30%|### | 12.5M/41.5M [02:48<07:42, 65.6kB/s]
30%|### | 12.6M/41.5M [02:48<06:53, 73.4kB/s]
30%|### | 12.6M/41.5M [02:48<06:23, 79.0kB/s]
30%|### | 12.6M/41.5M [02:49<08:18, 60.8kB/s]
30%|### | 12.6M/41.5M [02:49<06:00, 83.9kB/s]
30%|### | 12.6M/41.5M [02:49<06:35, 76.4kB/s]
30%|### | 12.6M/41.5M [02:49<06:15, 80.5kB/s]
31%|### | 12.7M/41.5M [02:50<06:49, 73.8kB/s]
31%|### | 12.7M/41.5M [02:50<06:24, 78.5kB/s]
31%|### | 12.7M/41.5M [02:50<06:07, 82.3kB/s]
31%|### | 12.7M/41.5M [02:50<06:17, 80.0kB/s]
31%|### | 12.7M/41.5M [02:50<06:26, 78.1kB/s]
31%|### | 12.7M/41.5M [02:51<05:43, 87.9kB/s]
31%|### | 12.8M/41.5M [02:51<06:02, 83.1kB/s]
31%|### | 12.8M/41.5M [02:51<05:50, 85.8kB/s]
31%|### | 12.8M/41.5M [02:51<05:42, 87.8kB/s]
31%|### | 12.8M/41.5M [02:51<05:12, 96.3kB/s]
31%|### | 12.8M/41.5M [02:51<05:15, 95.2kB/s]
31%|### | 12.8M/41.5M [02:52<05:43, 87.4kB/s]
31%|### | 12.9M/41.5M [02:52<04:53, 102kB/s]
31%|###1 | 12.9M/41.5M [02:52<05:21, 93.3kB/s]
31%|###1 | 12.9M/41.5M [02:52<06:52, 72.7kB/s]
31%|###1 | 12.9M/41.5M [02:53<05:37, 88.7kB/s]
31%|###1 | 12.9M/41.5M [02:53<05:34, 89.6kB/s]
31%|###1 | 13.0M/41.5M [02:53<06:54, 72.3kB/s]
31%|###1 | 13.0M/41.5M [02:54<08:18, 60.0kB/s]
31%|###1 | 13.0M/41.5M [02:54<07:29, 66.5kB/s]
31%|###1 | 13.0M/41.5M [02:54<08:01, 62.1kB/s]
31%|###1 | 13.0M/41.5M [02:54<08:32, 58.3kB/s]
31%|###1 | 13.0M/4
1.5M [02:54<08:25, 59.1kB/s]
31%|###1 | 13.0M/41.5M [02:55<08:57, 55.6kB/s]
31%|###1 | 13.0M/41.5M [02:55<09:23, 52.9kB/s]
31%|###1 | 13.0M/41.5M [02:55<12:34, 39.6kB/s]
31%|###1 | 13.0M/41.5M [02:55<09:28, 52.5kB/s]
31%|###1 | 13.1M/41.5M [02:55<09:45, 50.9kB/s]
32%|###1 | 13.1M/41.5M [02:56<08:33, 58.1kB/s]
32%|###1 | 13.1M/41.5M [02:56<09:01, 55.0kB/s]
32%|###1 | 13.1M/41.5M [02:56<08:45, 56.7kB/s]
32%|###1 | 13.1M/41.5M [02:56<08:25, 58.8kB/s]
32%|###1 | 13.1M/41.5M [02:56<08:23, 59.1kB/s]
32%|###1 | 13.1M/41.5M [02:56<08:16, 60.0kB/s]
32%|###1 | 13.1M/41.5M [02:57<08:22, 59.1kB/s]
32%|###1 | 13.1M/41.5M [02:57<07:05, 69.9kB/s]
32%|###1 | 13.1M/41.5M [02:57<07:06, 69.7kB/s]
32%|###1 | 13.2M/41.5M [02:57<07:05, 69.8kB/s]
32%|###1 | 13.2M/41.5M [02:57<08:57, 55.3kB/s]
32%|###1 | 13.2M/41.5M [02:58<09:19, 53.0kB/s]
32%|###1 | 13.2M/41.5M [02:58<10:33, 46.8kB/s]
32%|###1 | 13.2M/41.5M [02:58<07:25, 66.5kB/s]
32%|###1 | 13.2M/41.5M [02:58<07:59, 61.8kB/s]
32%|###1 | 13.2M/41.5M [02:59<08:31, 58.0kB/s]
32%|###1 | 13.3M/41.5M [02:59<07:21, 67.0kB/s]
32%|###1 | 13.3M/41.5M [02:59<12:25, 39.7kB/s]
32%|###2 | 13.3M/41.5M [03:00<08:15, 59.7kB/s]
32%|###2 | 13.3M/41.5M [03:00<08:40, 56.7kB/s]
32%|###2 | 13.3M/41.5M [03:00<09:04, 54.2kB/s]
32%|###2 | 13.3M/41.5M [03:00<07:40, 64.1kB/s]
32%|###2 | 13.3M/41.5M [03:00<08:17, 59.4kB/s]
32%|###2 | 13.3M/41.5M [03:01<09:10, 53.6kB/s]
32%|###2 | 13.4M/41.5M [03:01<07:48, 62.9kB/s]
32%|###2 | 13.4M/41.5M [03:01<08:22, 58.7kB/s]
32%|###2 | 13.4M/41.5M [03:01<08:51, 55.5kB/s]
32%|###2 | 13.4M/41.5M [03:02<11:34, 42.4kB/s]
32%|###2 | 13.4M/41.5M [03:02<07:55, 62.0kB/s]
32%|###2 | 13.4M/
41.5M [03:02<08:23, 58.5kB/s]
32%|###2 | 13.4M/41.5M [03:02<07:20, 66.8kB/s]
32%|###2 | 13.4M/41.5M [03:02<07:57, 61.5kB/s]
32%|###2 | 13.5M/41.5M [03:03<10:46, 45.5kB/s]
33%|###2 | 13.5M/41.5M [03:03<07:37, 64.1kB/s]
33%|###2 | 13.5M/41.5M [03:03<08:07, 60.2kB/s]
33%|###2 | 13.5M/41.5M [03:03<08:36, 56.9kB/s]
33%|###2 | 13.5M/41.5M [03:04<07:24, 66.0kB/s]
33%|###2 | 13.5M/41.5M [03:04<10:12, 47.9kB/s]
33%|###2 | 13.5M/41.5M [03:04<07:04, 69.0kB/s]
33%|###2 | 13.6M/41.5M [03:04<07:41, 63.4kB/s]
33%|###2 | 13.6M/41.5M [03:04<08:16, 58.9kB/s]
33%|###2 | 13.6M/41.5M [03:05<08:47, 55.5kB/s]
33%|###2 | 13.6M/41.5M [03:05<09:12, 53.0kB/s]
33%|###2 | 13.6M/41.5M [03:05<07:34, 64.3kB/s]
33%|###2 | 13.6M/41.5M [03:05<08:14, 59.1kB/s]
33%|###2 | 13.6M/41.5M [03:05<08:47, 55.4kB/s]
33%|###2 | 13.6M/41.5M [03:06<07:19, 66.4kB/s
]
33%|###2 | 13.6M/41.5M [03:06<08:03, 60.5kB/s]
33%|###2 | 13.6M/41.5M [03:06<06:56, 70.1kB/s]
33%|###2 | 13.7M/41.5M [03:06<06:19, 76.8kB/s]
33%|###2 | 13.7M/41.5M [03:06<07:44, 62.8kB/s]
33%|###3 | 13.7M/41.5M [03:07<05:56, 81.7kB/s]
33%|###3 | 13.7M/41.5M [03:07<05:44, 84.6kB/s]
33%|###3 | 13.7M/41.5M [03:07<05:35, 86.8kB/s]
33%|###3 | 13.8M/41.5M [03:07<05:28, 88.5kB/s]
33%|###3 | 13.8M/41.5M [03:07<05:24, 89.7kB/s]
33%|###3 | 13.8M/41.5M [03:07<05:20, 90.6kB/s]
33%|###3 | 13.8M/41.5M [03:08<05:18, 91.2kB/s]
33%|###3 | 13.8M/41.5M [03:08<05:16, 91.7kB/s]
33%|###3 | 13.8M/41.5M [03:08<05:15, 92.0kB/s]
33%|###3 | 13.8M/41.5M [03:08<05:14, 92.2kB/s]
33%|###3 | 13.9M/41.5M [03:08<05:13, 92.4kB/s]
33%|###3 | 13.9M/41.5M [03:09<06:46, 71.2kB/s]
33%|###3 | 13.9M/41.5M [03:09<05:29, 87.9kB/s]
34%|###3 | 13.9M
/41.5M [03:09<05:24, 89.2kB/s]
34%|###3 | 13.9M/41.5M [03:09<05:20, 90.1kB/s]
34%|###3 | 13.9M/41.5M [03:09<05:17, 90.9kB/s]
34%|###3 | 14.0M/41.5M [03:10<05:15, 91.4kB/s]
34%|###3 | 14.0M/41.5M [03:10<05:35, 85.9kB/s]
34%|###3 | 14.0M/41.5M [03:10<05:07, 93.9kB/s]
34%|###3 | 14.0M/41.5M [03:10<05:08, 93.6kB/s]
34%|###3 | 14.0M/41.5M [03:10<05:08, 93.3kB/s]
34%|###3 | 14.0M/41.5M [03:10<05:09, 93.1kB/s]
34%|###3 | 14.1M/41.5M [03:11<05:09, 93.0kB/s]
34%|###3 | 14.1M/41.5M [03:11<04:29, 107kB/s]
34%|###3 | 14.1M/41.5M [03:11<04:39, 103kB/s]
34%|###4 | 14.1M/41.5M [03:11<04:48, 99.7kB/s]
34%|###4 | 14.1M/41.5M [03:11<04:17, 111kB/s]
34%|###4 | 14.1M/41.5M [03:12<04:30, 106kB/s]
34%|###4 | 14.2M/41.5M [03:12<05:00, 95.3kB/s]
34%|###4 | 14.2M/41.5M [03:12<04:43, 101kB/s]
34%|###4 | 14.2M/41.5M [03:12<04:50, 98.6kB/s]
34%|###4 | 14.2M/41.5M [03:12<06:25, 74.2kB/s]
34%|###4 | 14.2M/41.5M [03:13<07:54, 60.3kB/s]
34%|###4 | 14.3M/41.5M [03:13<05:38, 84.3kB/s]
34%|###4 | 14.3M/41.5M [03:13<05:48, 81.9kB/s]
34%|###4 | 14.3M/41.5M [03:14<06:36, 72.0kB/s]
34%|###4 | 14.3M/41.5M [03:14<08:54, 53.3kB/s]
35%|###4 | 14.3M/41.5M [03:14<06:10, 76.8kB/s]
35%|###4 | 14.4M/41.5M [03:15<08:36, 55.1kB/s]
35%|###4 | 14.4M/41.5M [03:15<06:36, 71.7kB/s]
35%|###4 | 14.4M/41.5M [03:15<06:53, 68.8kB/s]
35%|###4 | 14.4M/41.5M [03:16<08:39, 54.7kB/s]
35%|###4 | 14.4M/41.5M [03:16<06:30, 72.6kB/s]
35%|###4 | 14.5M/41.5M [03:16<07:26, 63.5kB/s]
35%|###4 | 14.5M/41.5M [03:17<06:48, 69.3kB/s]
35%|###4 | 14.5M/41.5M [03:17<06:20, 74.5kB/s]
35%|###4 | 14.5M/41.5M [03:17<05:58, 78.9kB/s]
35%|###4 | 14.5M/41.5M [03:17<07:10, 65.7kB/s]
35%|###5 | 14.5M/
41.5M [03:17<06:33, 71.8kB/s]
35%|###5 | 14.5M/41.5M [03:18<06:07, 76.9kB/s]
35%|###5 | 14.6M/41.5M [03:18<05:48, 81.0kB/s]
35%|###5 | 14.6M/41.5M [03:18<05:35, 84.1kB/s]
35%|###5 | 14.6M/41.5M [03:18<05:25, 86.5kB/s]
35%|###5 | 14.6M/41.5M [03:18<06:50, 68.7kB/s]
35%|###5 | 14.6M/41.5M [03:19<04:51, 96.7kB/s]
35%|###5 | 14.7M/41.5M [03:19<04:54, 95.7kB/s]
35%|###5 | 14.7M/41.5M [03:19<04:56, 94.9kB/s]
35%|###5 | 14.7M/41.5M [03:19<06:20, 73.8kB/s]
35%|###5 | 14.7M/41.5M [03:20<06:28, 72.2kB/s]
36%|###5 | 14.7M/41.5M [03:20<05:24, 86.5kB/s]
36%|###5 | 14.8M/41.5M [03:20<05:18, 88.0kB/s]
36%|###5 | 14.8M/41.5M [03:20<05:14, 89.2kB/s]
36%|###5 | 14.8M/41.5M [03:21<06:32, 71.3kB/s]
36%|###5 | 14.8M/41.5M [03:21<07:07, 65.5kB/s]
36%|###5 | 14.8M/41.5M [03:21<08:03, 57.9kB/s]
36%|###5 | 14.8M/41.5M [03:22<08:25, 55.3kB/s
]
36%|###5 | 14.9M/41.5M [03:22<07:31, 61.9kB/s]
36%|###5 | 14.9M/41.5M [03:22<07:51, 59.1kB/s]
36%|###5 | 14.9M/41.5M [03:22<08:13, 56.6kB/s]
36%|###5 | 14.9M/41.5M [03:23<07:11, 64.6kB/s]
36%|###5 | 14.9M/41.5M [03:23<07:42, 60.2kB/s]
36%|###5 | 14.9M/41.5M [03:23<08:11, 56.7kB/s]
36%|###5 | 14.9M/41.5M [03:23<06:59, 66.3kB/s]
36%|###5 | 14.9M/41.5M [03:23<07:37, 60.8kB/s]
36%|###6 | 14.9M/41.5M [03:23<08:10, 56.7kB/s]
36%|###6 | 15.0M/41.5M [03:24<06:54, 67.1kB/s]
36%|###6 | 15.0M/41.5M [03:24<07:35, 61.1kB/s]
36%|###6 | 15.0M/41.5M [03:24<06:35, 70.4kB/s]
36%|###6 | 15.0M/41.5M [03:24<07:18, 63.3kB/s]
36%|###6 | 15.0M/41.5M [03:24<06:25, 72.1kB/s]
36%|###6 | 15.0M/41.5M [03:25<05:54, 78.2kB/s]
36%|###6 | 15.0M/41.5M [03:25<05:36, 82.6kB/s]
36%|###6 | 15.0M/41.5M [03:25<07:01, 65.8kB/s]
36%|###6 | 15.1M
/41.5M [03:25<09:27, 48.9kB/s]
36%|###6 | 15.1M/41.5M [03:26<05:50, 79.0kB/s]
36%|###6 | 15.1M/41.5M [03:26<05:36, 82.3kB/s]
36%|###6 | 15.1M/41.5M [03:26<08:11, 56.2kB/s]
37%|###6 | 15.1M/41.5M [03:26<05:39, 81.3kB/s]
37%|###6 | 15.2M/41.5M [03:27<05:29, 83.8kB/s]
37%|###6 | 15.2M/41.5M [03:27<08:11, 56.1kB/s]
37%|###6 | 15.2M/41.5M [03:27<06:10, 74.3kB/s]
37%|###6 | 15.2M/41.5M [03:28<07:06, 64.5kB/s]
37%|###6 | 15.2M/41.5M [03:28<06:32, 70.2kB/s]
37%|###6 | 15.2M/41.5M [03:28<07:27, 61.5kB/s]
37%|###6 | 15.3M/41.5M [03:28<06:44, 68.0kB/s]
37%|###6 | 15.3M/41.5M [03:29<06:13, 73.6kB/s]
37%|###6 | 15.3M/41.5M [03:29<05:50, 78.3kB/s]
37%|###6 | 15.3M/41.5M [03:29<05:34, 82.1kB/s]
37%|###6 | 15.3M/41.5M [03:29<05:22, 85.0kB/s]
37%|###6 | 15.3M/41.5M [03:30<08:10, 55.9kB/s]
37%|###7 | 15.4M/41.5M [03:30<07:33, 60.4kB/
s]
37%|###7 | 15.4M/41.5M [03:30<06:01, 75.7kB/s]
37%|###7 | 15.4M/41.5M [03:31<11:02, 41.3kB/s]
37%|###7 | 15.4M/41.5M [03:31<08:30, 53.5kB/s]
37%|###7 | 15.4M/41.5M [03:32<10:03, 45.2kB/s]
37%|###7 | 15.5M/41.5M [03:32<10:01, 45.4kB/s]
37%|###7 | 15.5M/41.5M [03:32<09:58, 45.6kB/s]
37%|###7 | 15.5M/41.5M [03:32<09:56, 45.7kB/s]
37%|###7 | 15.5M/41.5M [03:33<09:54, 45.9kB/s]
37%|###7 | 15.5M/41.5M [03:33<09:52, 46.0kB/s]
37%|###7 | 15.5M/41.5M [03:33<09:51, 46.1kB/s]
37%|###7 | 15.5M/41.5M [03:33<12:28, 36.4kB/s]
37%|###7 | 15.5M/41.5M [03:34<11:18, 40.2kB/s]
37%|###7 | 15.5M/41.5M [03:34<10:56, 41.5kB/s]
37%|###7 | 15.5M/41.5M [03:34<10:38, 42.6kB/s]
37%|###7 | 15.5M/41.5M [03:34<08:13, 55.1kB/s]
37%|###7 | 15.6M/41.5M [03:34<10:50, 41.8kB/s]
38%|###7 | 15.6M/41.5M [03:35<08:54, 50.8kB/s]
38%|###7 | 15.6
M/41.5M [03:35<09:05, 49.8kB/s]
38%|###7 | 15.6M/41.5M [03:35<08:43, 51.9kB/s]
38%|###7 | 15.6M/41.5M [03:35<08:58, 50.4kB/s]
38%|###7 | 15.6M/41.5M [03:35<09:11, 49.3kB/s]
38%|###7 | 15.6M/41.5M [03:36<09:20, 48.4kB/s]
38%|###7 | 15.6M/41.5M [03:36<07:22, 61.3kB/s]
38%|###7 | 15.6M/41.5M [03:36<07:56, 56.9kB/s]
38%|###7 | 15.6M/41.5M [03:36<06:41, 67.5kB/s]
38%|###7 | 15.7M/41.5M [03:36<07:22, 61.3kB/s]
38%|###7 | 15.7M/41.5M [03:36<06:23, 70.7kB/s]
38%|###7 | 15.7M/41.5M [03:37<07:06, 63.4kB/s]
38%|###7 | 15.7M/41.5M [03:37<08:06, 55.6kB/s]
38%|###7 | 15.7M/41.5M [03:37<06:15, 72.0kB/s]
38%|###7 | 15.7M/41.5M [03:37<06:28, 69.6kB/s]
38%|###7 | 15.7M/41.5M [03:38<07:33, 59.6kB/s]
38%|###7 | 15.8M/41.5M [03:38<06:22, 70.6kB/s]
38%|###8 | 15.8M/41.5M [03:38<06:32, 68.7kB/s]
38%|###8 | 15.8M/41.5M [03:38<05:59, 75.0kB
/s]
38%|###8 | 15.8M/41.5M [03:39<08:29, 52.9kB/s]
38%|###8 | 15.8M/41.5M [03:39<06:08, 73.0kB/s]
38%|###8 | 15.8M/41.5M [03:39<06:29, 69.0kB/s]
38%|###8 | 15.8M/41.5M [03:39<06:34, 68.2kB/s]
38%|###8 | 15.9M/41.5M [03:39<05:59, 74.8kB/s]
38%|###8 | 15.9M/41.5M [03:40<08:32, 52.4kB/s]
38%|###8 | 15.9M/41.5M [03:40<06:07, 72.9kB/s]
38%|###8 | 15.9M/41.5M [03:40<05:45, 77.7kB/s]
38%|###8 | 15.9M/41.5M [03:40<06:34, 68.0kB/s]
38%|###8 | 15.9M/41.5M [03:41<06:02, 73.9kB/s]
38%|###8 | 15.9M/41.5M [03:41<08:22, 53.3kB/s]
38%|###8 | 16.0M/41.5M [03:41<06:08, 72.6kB/s]
39%|###8 | 16.0M/41.5M [03:42<08:32, 52.2kB/s]
39%|###8 | 16.0M/41.5M [03:42<07:12, 61.8kB/s]
39%|###8 | 16.0M/41.5M [03:42<07:54, 56.3kB/s]
39%|###8 | 16.0M/41.5M [03:42<08:12, 54.3kB/s]
39%|###8 | 16.0M/41.5M [03:42<07:04, 62.9kB/s]
39%|###8 | 16.
0M/41.5M [03:43<09:23, 47.4kB/s]
39%|###8 | 16.1M/41.5M [03:43<07:45, 57.3kB/s]
39%|###8 | 16.1M/41.5M [03:43<08:07, 54.7kB/s]
39%|###8 | 16.1M/41.5M [03:43<08:39, 51.3kB/s]
39%|###8 | 16.1M/41.5M [03:44<07:19, 60.6kB/s]
39%|###8 | 16.1M/41.5M [03:44<07:45, 57.2kB/s]
39%|###8 | 16.1M/41.5M [03:44<08:09, 54.4kB/s]
39%|###8 | 16.1M/41.5M [03:44<06:50, 64.7kB/s]
39%|###8 | 16.1M/41.5M [03:45<15:59, 27.7kB/s]
39%|###9 | 16.2M/41.5M [03:45<06:29, 68.0kB/s]
39%|###9 | 16.2M/41.5M [03:45<06:13, 70.9kB/s]
39%|###9 | 16.2M/41.5M [03:46<08:06, 54.5kB/s]
39%|###9 | 16.2M/41.5M [03:46<06:22, 69.3kB/s]
39%|###9 | 16.3M/41.5M [03:47<07:09, 61.6kB/s]
39%|###9 | 16.3M/41.5M [03:47<07:46, 56.7kB/s]
39%|###9 | 16.3M/41.5M [03:47<07:15, 60.6kB/s]
39%|###9 | 16.3M/41.5M [03:47<07:37, 57.7kB/s]
39%|###9 | 16.3M/41.5M [03:47<07:31, 58.5k
B/s]
39%|###9 | 16.3M/41.5M [03:48<07:56, 55.4kB/s]
39%|###9 | 16.3M/41.5M [03:48<08:50, 49.7kB/s]
39%|###9 | 16.3M/41.5M [03:48<08:25, 52.2kB/s]
39%|###9 | 16.3M/41.5M [03:48<07:22, 59.6kB/s]
39%|###9 | 16.4M/41.5M [03:48<07:18, 60.1kB/s]
39%|###9 | 16.4M/41.5M [03:48<07:50, 56.0kB/s]
39%|###9 | 16.4M/41.5M [03:49<08:15, 53.1kB/s]
39%|###9 | 16.4M/41.5M [03:49<06:45, 65.0kB/s]
40%|###9 | 16.4M/41.5M [03:49<07:23, 59.4kB/s]
40%|###9 | 16.4M/41.5M [03:49<06:44, 65.0kB/s]
40%|###9 | 16.4M/41.5M [03:49<06:52, 63.8kB/s]
40%|###9 | 16.4M/41.5M [03:50<06:01, 72.6kB/s]
40%|###9 | 16.4M/41.5M [03:50<05:56, 73.6kB/s]
40%|###9 | 16.5M/41.5M [03:50<05:31, 79.2kB/s]
40%|###9 | 16.5M/41.5M [03:50<05:51, 74.5kB/s]
40%|###9 | 16.5M/41.5M [03:50<07:07, 61.3kB/s]
40%|###9 | 16.5M/41.5M [03:51<04:59, 87.6kB/s]
40%|###9 | 16
.5M/41.5M [03:51<04:54, 88.9kB/s]
40%|###9 | 16.5M/41.5M [03:51<05:12, 83.8kB/s]
40%|###9 | 16.6M/41.5M [03:51<06:01, 72.4kB/s]
40%|###9 | 16.6M/41.5M [03:51<06:34, 66.2kB/s]
40%|###9 | 16.6M/41.5M [03:52<05:58, 72.9kB/s]
40%|#### | 16.6M/41.5M [03:52<05:33, 78.2kB/s]
40%|#### | 16.6M/41.5M [03:52<05:17, 82.2kB/s]
40%|#### | 16.6M/41.5M [03:52<05:05, 85.2kB/s]
40%|#### | 16.6M/41.5M [03:53<06:24, 67.8kB/s]
40%|#### | 16.7M/41.5M [03:53<05:52, 73.9kB/s]
40%|#### | 16.7M/41.5M [03:53<05:30, 78.8kB/s]
40%|#### | 16.7M/41.5M [03:53<05:15, 82.5kB/s]
40%|#### | 16.7M/41.5M [03:53<06:28, 66.8kB/s]
40%|#### | 16.7M/41.5M [03:54<05:09, 83.8kB/s]
40%|#### | 16.8M/41.5M [03:54<05:04, 85.1kB/s]
40%|#### | 16.8M/41.5M [03:54<05:13, 82.6kB/s]
40%|#### | 16.8M/41.5M [03:54<05:03, 85.3kB/s]
40%|#### | 16.8M/41.5M [03:54<04:39, 92.6
kB/s]
41%|#### | 16.8M/41.5M [03:55<04:56, 87.3kB/s]
41%|#### | 16.8M/41.5M [03:55<04:51, 88.9kB/s]
41%|#### | 16.8M/41.5M [03:55<04:47, 90.0kB/s]
41%|#### | 16.9M/41.5M [03:55<04:44, 90.8kB/s]
41%|#### | 16.9M/41.5M [03:55<05:44, 74.9kB/s]
41%|#### | 16.9M/41.5M [03:56<04:45, 90.3kB/s]
41%|#### | 16.9M/41.5M [03:56<04:43, 91.0kB/s]
41%|#### | 16.9M/41.5M [03:56<05:17, 81.1kB/s]
41%|#### | 16.9M/41.5M [03:56<07:24, 57.9kB/s]
41%|#### | 17.0M/41.5M [03:57<05:31, 77.6kB/s]
41%|#### | 17.0M/41.5M [03:57<06:30, 65.8kB/s]
41%|#### | 17.0M/41.5M [03:57<05:58, 71.5kB/s]
41%|####1 | 17.0M/41.5M [03:57<05:35, 76.5kB/s]
41%|####1 | 17.0M/41.5M [03:58<05:18, 80.5kB/s]
41%|####1 | 17.0M/41.5M [03:58<05:06, 83.7kB/s]
41%|####1 | 17.1M/41.5M [03:58<04:57, 86.2kB/s]
41%|####1 | 17.1M/41.5M [03:58<06:12, 68.8kB/s]
41%|####1 | 1
7.1M/41.5M [03:58<05:43, 74.5kB/s]
41%|####1 | 17.1M/41.5M [03:59<05:23, 79.1kB/s]
41%|####1 | 17.1M/41.5M [03:59<05:08, 82.7kB/s]
41%|####1 | 17.1M/41.5M [03:59<04:58, 85.5kB/s]
41%|####1 | 17.2M/41.5M [03:59<04:13, 101kB/s]
41%|####1 | 17.2M/41.5M [03:59<04:37, 92.0kB/s]
41%|####1 | 17.2M/41.5M [03:59<04:18, 98.6kB/s]
41%|####1 | 17.2M/41.5M [04:00<06:50, 62.0kB/s]
42%|####1 | 17.2M/41.5M [04:00<05:31, 76.7kB/s]
42%|####1 | 17.2M/41.5M [04:00<05:16, 80.3kB/s]
42%|####1 | 17.3M/41.5M [04:01<06:19, 66.9kB/s]
42%|####1 | 17.3M/41.5M [04:01<05:49, 72.6kB/s]
42%|####1 | 17.3M/41.5M [04:01<05:27, 77.4kB/s]
42%|####1 | 17.3M/41.5M [04:01<06:31, 64.8kB/s]
42%|####1 | 17.3M/41.5M [04:02<07:17, 57.9kB/s]
42%|####1 | 17.3M/41.5M [04:02<07:59, 52.8kB/s]
42%|####1 | 17.4M/41.5M [04:02<07:58, 52.9kB/s]
42%|####1 | 17.4M/41.5M [04:02<08:11, 51.
5kB/s]
42%|####1 | 17.4M/41.5M [04:03<10:19, 40.8kB/s]
42%|####1 | 17.4M/41.5M [04:03<14:17, 29.5kB/s]
42%|####1 | 17.4M/41.5M [04:04<14:44, 28.6kB/s]
42%|####1 | 17.4M/41.5M [04:04<12:01, 35.0kB/s]
42%|####1 | 17.4M/41.5M [04:04<11:24, 36.9kB/s]
42%|####1 | 17.4M/41.5M [04:05<10:52, 38.7kB/s]
42%|####2 | 17.4M/41.5M [04:05<10:25, 40.3kB/s]
42%|####2 | 17.4M/41.5M [04:05<10:04, 41.7kB/s]
42%|####2 | 17.4M/41.5M [04:05<12:14, 34.3kB/s]
42%|####2 | 17.5M/41.5M [04:05<11:21, 37.0kB/s]
42%|####2 | 17.5M/41.5M [04:06<10:41, 39.3kB/s]
42%|####2 | 17.5M/41.5M [04:06<10:13, 41.1kB/s]
42%|####2 | 17.5M/41.5M [04:06<09:52, 42.5kB/s]
42%|####2 | 17.5M/41.5M [04:06<09:37, 43.6kB/s]
42%|####2 | 17.5M/41.5M [04:07<12:07, 34.6kB/s]
42%|####2 | 17.5M/41.5M [04:07<11:12, 37.4kB/s]
42%|####2 | 17.5M/41.5M [04:07<13:15, 31.6kB/s]
42%|####2 |
17.5M/41.5M [04:07<09:13, 45.4kB/s]
42%|####2 | 17.5M/41.5M [04:08<09:08, 45.8kB/s]
42%|####2 | 17.6M/41.5M [04:08<07:27, 56.0kB/s]
42%|####2 | 17.6M/41.5M [04:08<07:46, 53.7kB/s]
42%|####2 | 17.6M/41.5M [04:08<08:03, 51.9kB/s]
42%|####2 | 17.6M/41.5M [04:08<08:17, 50.4kB/s]
42%|####2 | 17.6M/41.5M [04:08<08:28, 49.3kB/s]
42%|####2 | 17.6M/41.5M [04:09<06:46, 61.6kB/s]
42%|####2 | 17.6M/41.5M [04:09<07:17, 57.2kB/s]
42%|####2 | 17.6M/41.5M [04:09<09:57, 41.9kB/s]
42%|####2 | 17.6M/41.5M [04:09<09:42, 43.0kB/s]
42%|####2 | 17.6M/41.5M [04:10<09:30, 43.8kB/s]
43%|####2 | 17.6M/41.5M [04:10<09:21, 44.5kB/s]
43%|####2 | 17.6M/41.5M [04:10<11:49, 35.2kB/s]
43%|####2 | 17.7M/41.5M [04:10<08:31, 48.9kB/s]
43%|####2 | 17.7M/41.5M [04:11<10:48, 38.5kB/s]
43%|####2 | 17.7M/41.5M [04:11<08:13, 50.6kB/s]
43%|####2 | 17.7M/41.5M [04:11<10:25, 39
.9kB/s]
43%|####2 | 17.7M/41.5M [04:11<09:50, 42.2kB/s]
43%|####2 | 17.7M/41.5M [04:12<09:38, 43.1kB/s]
43%|####2 | 17.7M/41.5M [04:12<11:36, 35.8kB/s]
43%|####2 | 17.7M/41.5M [04:12<10:56, 38.0kB/s]
43%|####2 | 17.7M/41.5M [04:12<10:24, 39.9kB/s]
43%|####2 | 17.8M/41.5M [04:13<09:59, 41.5kB/s]
43%|####2 | 17.8M/41.5M [04:13<09:41, 42.8kB/s]
43%|####2 | 17.8M/41.5M [04:13<09:28, 43.7kB/s]
43%|####2 | 17.8M/41.5M [04:13<09:19, 44.5kB/s]
43%|####2 | 17.8M/41.5M [04:13<09:12, 45.0kB/s]
43%|####2 | 17.8M/41.5M [04:14<11:45, 35.2kB/s]
43%|####2 | 17.8M/41.5M [04:14<10:54, 37.9kB/s]
43%|####2 | 17.8M/41.5M [04:14<08:27, 49.0kB/s]
43%|####2 | 17.8M/41.5M [04:14<08:33, 48.3kB/s]
43%|####2 | 17.8M/41.5M [04:15<10:58, 37.7kB/s]
43%|####2 | 17.8M/41.5M [04:15<09:49, 42.1kB/s]
43%|####3 | 17.8M/41.5M [04:15<09:33, 43.2kB/s]
43%|####3 |
17.9M/41.5M [04:15<10:49, 38.2kB/s]
43%|####3 | 17.9M/41.5M [04:15<10:15, 40.2kB/s]
43%|####3 | 17.9M/41.5M [04:15<09:51, 41.8kB/s]
43%|####3 | 17.9M/41.5M [04:16<09:34, 43.1kB/s]
43%|####3 | 17.9M/41.5M [04:16<09:22, 44.0kB/s]
43%|####3 | 17.9M/41.5M [04:16<09:13, 44.7kB/s]
43%|####3 | 17.9M/41.5M [04:16<07:54, 52.2kB/s]
43%|####3 | 17.9M/41.5M [04:16<07:12, 57.2kB/s]
43%|####3 | 17.9M/41.5M [04:17<07:38, 54.0kB/s]
43%|####3 | 17.9M/41.5M [04:17<06:16, 65.7kB/s]
43%|####3 | 17.9M/41.5M [04:17<06:52, 59.9kB/s]
43%|####3 | 18.0M/41.5M [04:17<05:53, 69.8kB/s]
43%|####3 | 18.0M/41.5M [04:17<06:58, 58.9kB/s]
43%|####3 | 18.0M/41.5M [04:18<05:12, 78.9kB/s]
43%|####3 | 18.0M/41.5M [04:18<04:58, 82.5kB/s]
43%|####3 | 18.0M/41.5M [04:18<04:48, 85.3kB/s]
43%|####3 | 18.0M/41.5M [04:18<06:00, 68.3kB/s]
44%|####3 | 18.1M/41.5M [04:18<05:31, 7
4.0kB/s]
44%|####3 | 18.1M/41.5M [04:19<05:11, 78.7kB/s]
44%|####3 | 18.1M/41.5M [04:19<04:57, 82.4kB/s]
44%|####3 | 18.1M/41.5M [04:19<04:47, 85.3kB/s]
44%|####3 | 18.1M/41.5M [04:19<04:40, 87.4kB/s]
44%|####3 | 18.1M/41.5M [04:20<05:54, 69.1kB/s]
44%|####3 | 18.2M/41.5M [04:20<05:00, 81.4kB/s]
44%|####3 | 18.2M/41.5M [04:20<05:05, 80.1kB/s]
44%|####3 | 18.2M/41.5M [04:20<04:53, 83.2kB/s]
44%|####3 | 18.2M/41.5M [04:20<04:44, 85.8kB/s]
44%|####3 | 18.2M/41.5M [04:20<04:38, 87.7kB/s]
44%|####3 | 18.2M/41.5M [04:21<04:33, 89.1kB/s]
44%|####4 | 18.3M/41.5M [04:21<04:30, 90.2kB/s]
44%|####4 | 18.3M/41.5M [04:21<04:27, 90.9kB/s]
44%|####4 | 18.3M/41.5M [04:21<04:25, 91.5kB/s]
44%|####4 | 18.3M/41.5M [04:21<04:24, 91.8kB/s]
44%|####4 | 18.3M/41.5M [04:22<04:23, 92.1kB/s]
44%|####4 | 18.3M/41.5M [04:22<04:06, 98.5kB/s]
44%|####4
| 18.4M/41.5M [04:22<04:10, 96.7kB/s]
44%|####4 | 18.4M/41.5M [04:22<04:13, 95.5kB/s]
44%|####4 | 18.4M/41.5M [04:22<03:42, 109kB/s]
44%|####4 | 18.4M/41.5M [04:22<03:38, 111kB/s]
44%|####4 | 18.4M/41.5M [04:23<03:22, 119kB/s]
44%|####4 | 18.5M/41.5M [04:23<03:23, 119kB/s]
45%|####4 | 18.5M/41.5M [04:23<03:13, 125kB/s]
45%|####4 | 18.5M/41.5M [04:23<03:18, 122kB/s]
45%|####4 | 18.5M/41.5M [04:23<02:58, 135kB/s]
45%|####4 | 18.5M/41.5M [04:23<02:56, 136kB/s]
45%|####4 | 18.6M/41.5M [04:24<02:55, 137kB/s]
45%|####4 | 18.6M/41.5M [04:24<02:48, 143kB/s]
45%|####4 | 18.6M/41.5M [04:24<02:38, 151kB/s]
45%|####4 | 18.6M/41.5M [04:24<03:32, 113kB/s]
45%|####5 | 18.7M/41.5M [04:25<03:06, 128kB/s]
45%|####5 | 18.7M/41.5M [04:25<02:52, 139kB/s]
45%|####5 | 18.7M/41.5M [04:25<03:09, 126kB/s]
45%|####5 | 18.8M/41.5M [04:25<03:03, 130kB/s]
45%|####5 | 18.8M/41.5M [04:25<03:19, 119kB/s]
45%|####5 | 18.8M/41.5M [04:26<04:35, 86.5kB/s]
45%|####5 | 18.8M/41.5M [04:26<05:35, 70.9kB/s]
45%|####5 | 18.8M/41.5M [04:26<04:50, 81.7kB/s]
45%|####5 | 18.8M/41.5M [04:27<07:56, 49.9kB/s]
45%|####5 | 18.9M/41.5M [04:27<09:22, 42.2kB/s]
45%|####5 | 18.9M/41.5M [04:28<07:00, 56.4kB/s]
46%|####5 | 18.9M/41.5M [04:28<07:23, 53.4kB/s]
46%|####5 | 18.9M/41.5M [04:28<06:32, 60.3kB/s]
46%|####5 | 18.9M/41.5M [04:28<06:51, 57.5kB/s]
46%|####5 | 18.9M/41.5M [04:28<07:09, 55.0kB/s]
46%|####5 | 18.9M/41.5M [04:29<06:08, 64.1kB/s]
46%|####5 | 19.0M/41.5M [04:29<08:19, 47.3kB/s]
46%|####5 | 19.0M/41.5M [04:29<07:01, 56.1kB/s]
46%|####5 | 19.0M/41.5M [04:30<07:17, 54.0kB/s]
46%|####5 | 19.0M/41.5M [04:30<06:13, 63.1kB/s]
46%|####5 | 19.0M/41.5M [04:30<08:41, 45.2kB/s]
46%|####5 | 19.0M/41.5M [
04:30<08:38, 45.4kB/s]
46%|####5 | 19.0M/41.5M [04:31<10:38, 36.9kB/s]
46%|####5 | 19.0M/41.5M [04:31<12:14, 32.1kB/s]
46%|####5 | 19.0M/41.5M [04:31<11:13, 35.0kB/s]
46%|####5 | 19.0M/41.5M [04:32<15:46, 24.9kB/s]
46%|####5 | 19.0M/41.5M [04:32<18:29, 21.2kB/s]
46%|####5 | 19.1M/41.5M [04:33<15:52, 24.7kB/s]
46%|####5 | 19.1M/41.5M [04:33<12:32, 31.2kB/s]
46%|####6 | 19.1M/41.5M [04:34<14:13, 27.5kB/s]
46%|####6 | 19.1M/41.5M [04:34<14:02, 27.9kB/s]
46%|####6 | 19.1M/41.5M [04:34<14:45, 26.5kB/s]
46%|####6 | 19.1M/41.5M [04:34<13:58, 28.0kB/s]
46%|####6 | 19.1M/41.5M [04:35<14:23, 27.2kB/s]
46%|####6 | 19.1M/41.5M [04:35<15:28, 25.3kB/s]
46%|####6 | 19.1M/41.5M [04:35<13:03, 29.9kB/s]
46%|####6 | 19.1M/41.5M [04:35<12:05, 32.3kB/s]
46%|####6 | 19.1M/41.5M [04:36<13:28, 29.0kB/s]
46%|####6 | 19.2M/41.5M [04:36<11:58, 32.6kB/s]
46%|####6 | 19.2M/41.5M [04:36<13:24, 29.1kB/s]
46%|####6 | 19.2M/41.5M [04:37<11:55, 32.7kB/s]
46%|####6 | 19.2M/41.5M [04:37<13:22, 29.1kB/s]
46%|####6 | 19.2M/41.5M [04:37<10:16, 37.9kB/s]
46%|####6 | 19.2M/41.5M [04:37<10:40, 36.5kB/s]
46%|####6 | 19.2M/41.5M [04:38<10:04, 38.6kB/s]
46%|####6 | 19.2M/41.5M [04:38<09:37, 40.5kB/s]
46%|####6 | 19.2M/41.5M [04:38<09:16, 42.0kB/s]
46%|####6 | 19.2M/41.5M [04:38<11:24, 34.1kB/s]
46%|####6 | 19.2M/41.5M [04:39<10:02, 38.7kB/s]
46%|####6 | 19.3M/41.5M [04:39<09:38, 40.3kB/s]
46%|####6 | 19.3M/41.5M [04:39<09:18, 41.7kB/s]
46%|####6 | 19.3M/41.5M [04:39<11:18, 34.3kB/s]
46%|####6 | 19.3M/41.5M [04:40<10:29, 37.0kB/s]
46%|####6 | 19.3M/41.5M [04:40<09:52, 39.3kB/s]
47%|####6 | 19.3M/41.5M [04:40<09:26, 41.1kB/s]
47%|####6 | 19.3M/41.5M [04:40<09:07, 42.5kB/s]
47%|####6 | 19.3M/41.5M
[04:40<08:46, 44.2kB/s]
47%|####6 | 19.3M/41.5M [04:41<08:59, 43.1kB/s]
47%|####6 | 19.4M/41.5M [04:41<08:24, 46.0kB/s]
47%|####6 | 19.4M/41.5M [04:41<08:46, 44.1kB/s]
47%|####6 | 19.4M/41.5M [04:42<08:40, 44.6kB/s]
47%|####6 | 19.4M/41.5M [04:42<08:35, 45.0kB/s]
47%|####6 | 19.4M/41.5M [04:42<08:31, 45.3kB/s]
47%|####6 | 19.4M/41.5M [04:42<08:27, 45.6kB/s]
47%|####6 | 19.4M/41.5M [04:42<08:25, 45.8kB/s]
47%|####6 | 19.4M/41.5M [04:42<08:23, 46.0kB/s]
47%|####6 | 19.4M/41.5M [04:43<07:49, 49.3kB/s]
47%|####6 | 19.4M/41.5M [04:43<07:58, 48.4kB/s]
47%|####6 | 19.4M/41.5M [04:43<10:30, 36.7kB/s]
47%|####6 | 19.5M/41.5M [04:43<06:32, 58.8kB/s]
47%|####6 | 19.5M/41.5M [04:43<06:54, 55.7kB/s]
47%|####6 | 19.5M/41.5M [04:44<05:52, 65.5kB/s]
47%|####6 | 19.5M/41.5M [04:44<05:16, 73.0kB/s]
47%|####6 | 19.5M/41.5M [04:44<05:52, 65.5kB/s]
47%|####7 | 19.5M/41.5M [04:44<05:14, 73.3kB/s]
47%|####7 | 19.5M/41.5M [04:44<04:51, 79.0kB/s]
47%|####7 | 19.5M/41.5M [04:45<04:37, 83.0kB/s]
47%|####7 | 19.6M/41.5M [04:45<04:27, 85.9kB/s]
47%|####7 | 19.6M/41.5M [04:45<04:21, 87.9kB/s]
47%|####7 | 19.6M/41.5M [04:45<04:16, 89.4kB/s]
47%|####7 | 19.6M/41.5M [04:45<05:29, 69.6kB/s]
47%|####7 | 19.6M/41.5M [04:46<05:04, 75.3kB/s]
47%|####7 | 19.6M/41.5M [04:46<06:01, 63.3kB/s]
47%|####7 | 19.7M/41.5M [04:46<04:59, 76.3kB/s]
47%|####7 | 19.7M/41.5M [04:46<05:35, 68.1kB/s]
47%|####7 | 19.7M/41.5M [04:47<05:27, 69.8kB/s]
47%|####7 | 19.7M/41.5M [04:47<05:34, 68.2kB/s]
48%|####7 | 19.7M/41.5M [04:47<05:25, 70.2kB/s]
48%|####7 | 19.7M/41.5M [04:47<05:33, 68.3kB/s]
48%|####7 | 19.7M/41.5M [04:47<05:03, 75.2kB/s]
48%|####7 | 19.8M/41.5M [04:48<05:03, 75.0kB/s]
48%|####7 | 19.8M/41.5M
[04:48<06:04, 62.6kB/s]
48%|####7 | 19.8M/41.5M [04:48<05:48, 65.2kB/s]
48%|####7 | 19.8M/41.5M [04:48<05:03, 74.9kB/s]
48%|####7 | 19.8M/41.5M [04:49<05:56, 63.7kB/s]
48%|####7 | 19.8M/41.5M [04:49<06:18, 59.9kB/s]
48%|####7 | 19.8M/41.5M [04:49<06:40, 56.7kB/s]
48%|####7 | 19.9M/41.5M [04:49<08:46, 43.1kB/s]
48%|####7 | 19.9M/41.5M [04:50<06:23, 59.2kB/s]
48%|####7 | 19.9M/41.5M [04:50<06:45, 55.8kB/s]
48%|####7 | 19.9M/41.5M [04:50<06:18, 59.8kB/s]
48%|####7 | 19.9M/41.5M [04:50<06:41, 56.3kB/s]
48%|####7 | 19.9M/41.5M [04:51<08:56, 42.2kB/s]
48%|####8 | 19.9M/41.5M [04:51<09:37, 39.1kB/s]
48%|####8 | 19.9M/41.5M [04:52<10:33, 35.7kB/s]
48%|####8 | 20.0M/41.5M [04:52<11:06, 33.9kB/s]
48%|####8 | 20.0M/41.5M [04:53<13:41, 27.5kB/s]
48%|####8 | 20.0M/41.5M [04:53<16:01, 23.5kB/s]
48%|####8 | 20.0M/41.5M [04:53<16:03, 23.4kB/s]
48%|####8 | 20.0M/41.5M [04:54<16:05, 23.4kB/s]
48%|####8 | 20.0M/41.5M [04:54<13:57, 26.9kB/s]
48%|####8 | 20.0M/41.5M [04:54<12:20, 30.4kB/s]
48%|####8 | 20.0M/41.5M [04:55<13:25, 28.0kB/s]
48%|####8 | 20.0M/41.5M [04:55<11:53, 31.6kB/s]
48%|####8 | 20.0M/41.5M [04:55<10:46, 34.8kB/s]
48%|####8 | 20.0M/41.5M [04:55<07:42, 48.6kB/s]
48%|####8 | 20.0M/41.5M [04:55<07:48, 48.0kB/s]
48%|####8 | 20.1M/41.5M [04:55<07:52, 47.6kB/s]
48%|####8 | 20.1M/41.5M [04:56<06:14, 60.0kB/s]
48%|####8 | 20.1M/41.5M [04:56<06:39, 56.2kB/s]
48%|####8 | 20.1M/41.5M [04:56<07:00, 53.4kB/s]
48%|####8 | 20.1M/41.5M [04:56<07:26, 50.2kB/s]
49%|####8 | 20.1M/41.5M [04:57<06:27, 57.8kB/s]
49%|####8 | 20.1M/41.5M [04:57<04:56, 75.6kB/s]
49%|####8 | 20.2M/41.5M [04:57<04:40, 79.7kB/s]
49%|####8 | 20.2M/41.5M [04:57<05:38, 66.1kB/s]
49%|####8 | 20.2M/41.5
M [04:58<06:02, 61.7kB/s]
49%|####8 | 20.2M/41.5M [04:58<09:31, 39.0kB/s]
49%|####8 | 20.2M/41.5M [04:59<10:24, 35.7kB/s]
49%|####8 | 20.2M/41.5M [04:59<13:10, 28.2kB/s]
49%|####8 | 20.2M/41.5M [04:59<12:00, 30.9kB/s]
49%|####8 | 20.2M/41.5M [05:00<12:58, 28.6kB/s]
49%|####8 | 20.2M/41.5M [05:00<11:41, 31.7kB/s]
49%|####8 | 20.2M/41.5M [05:00<12:51, 28.9kB/s]
49%|####8 | 20.3M/41.5M [05:00<11:30, 32.3kB/s]
49%|####8 | 20.3M/41.5M [05:01<10:30, 35.3kB/s]
49%|####8 | 20.3M/41.5M [05:01<09:46, 37.9kB/s]
49%|####8 | 20.3M/41.5M [05:01<09:15, 40.1kB/s]
49%|####8 | 20.3M/41.5M [05:01<08:52, 41.7kB/s]
49%|####8 | 20.3M/41.5M [05:01<08:36, 43.0kB/s]
49%|####8 | 20.3M/41.5M [05:02<09:16, 39.9kB/s]
49%|####8 | 20.3M/41.5M [05:02<08:02, 46.0kB/s]
49%|####8 | 20.3M/41.5M [05:02<08:01, 46.1kB/s]
49%|####8 | 20.3M/41.5M [05:02<08:00, 46.2kB/s]
49%|####9 | 20.3M/41.5M [05:02<07:59, 46.2kB/s]
49%|####9 | 20.4M/41.5M [05:02<06:08, 60.1kB/s]
49%|####9 | 20.4M/41.5M [05:03<06:35, 56.0kB/s]
49%|####9 | 20.4M/41.5M [05:03<05:30, 67.0kB/s]
49%|####9 | 20.4M/41.5M [05:03<06:03, 60.8kB/s]
49%|####9 | 20.4M/41.5M [05:03<06:31, 56.5kB/s]
49%|####9 | 20.4M/41.5M [05:03<05:28, 67.3kB/s]
49%|####9 | 20.4M/41.5M [05:03<06:01, 61.1kB/s]
49%|####9 | 20.4M/41.5M [05:04<06:30, 56.6kB/s]
49%|####9 | 20.4M/41.5M [05:04<05:27, 67.5kB/s]
49%|####9 | 20.4M/41.5M [05:04<06:00, 61.2kB/s]
49%|####9 | 20.5M/41.5M [05:04<05:12, 70.6kB/s]
49%|####9 | 20.5M/41.5M [05:04<04:45, 77.3kB/s]
49%|####9 | 20.5M/41.5M [05:04<04:28, 81.9kB/s]
49%|####9 | 20.5M/41.5M [05:05<05:08, 71.3kB/s]
49%|####9 | 20.5M/41.5M [05:05<04:42, 77.7kB/s]
49%|####9 | 20.5M/41.5M [05:05<04:27, 82.2kB/s]
50%|####9 | 20.5M/41.
5M [05:05<04:17, 85.3kB/s]
50%|####9 | 20.6M/41.5M [05:05<04:10, 87.6kB/s]
50%|####9 | 20.6M/41.5M [05:06<04:06, 89.1kB/s]
50%|####9 | 20.6M/41.5M [05:06<04:02, 90.2kB/s]
50%|####9 | 20.6M/41.5M [05:06<04:00, 91.0kB/s]
50%|####9 | 20.6M/41.5M [05:06<03:58, 91.6kB/s]
50%|####9 | 20.6M/41.5M [05:06<03:57, 92.0kB/s]
50%|####9 | 20.7M/41.5M [05:06<03:25, 106kB/s]
50%|####9 | 20.7M/41.5M [05:07<03:33, 102kB/s]
50%|####9 | 20.7M/41.5M [05:07<03:12, 113kB/s]
50%|####9 | 20.7M/41.5M [05:07<03:00, 121kB/s]
50%|##### | 20.8M/41.5M [05:07<02:52, 126kB/s]
50%|##### | 20.8M/41.5M [05:07<04:02, 89.5kB/s]
50%|##### | 20.8M/41.5M [05:08<02:53, 125kB/s]
50%|##### | 20.8M/41.5M [05:08<02:48, 128kB/s]
50%|##### | 20.8M/41.5M [05:08<03:14, 111kB/s]
50%|##### | 20.9M/41.5M [05:08<03:09, 114kB/s]
50%|##### | 20.9M/41.5M [05:08<03:35, 100kB/s]
50%|
##### | 20.9M/41.5M [05:09<03:39, 98.3kB/s]
50%|##### | 20.9M/41.5M [05:09<04:30, 79.7kB/s]
50%|##### | 20.9M/41.5M [05:09<04:34, 78.5kB/s]
50%|##### | 20.9M/41.5M [05:09<04:23, 81.7kB/s]
51%|##### | 21.0M/41.5M [05:09<04:14, 84.6kB/s]
51%|##### | 21.0M/41.5M [05:10<04:07, 86.9kB/s]
51%|##### | 21.0M/41.5M [05:10<04:02, 88.5kB/s]
51%|##### | 21.0M/41.5M [05:10<05:24, 66.1kB/s]
51%|##### | 21.0M/41.5M [05:10<04:18, 83.0kB/s]
51%|##### | 21.0M/41.5M [05:11<04:42, 75.8kB/s]
51%|##### | 21.1M/41.5M [05:11<04:43, 75.5kB/s]
51%|##### | 21.1M/41.5M [05:11<05:13, 68.4kB/s]
51%|##### | 21.1M/41.5M [05:11<04:46, 74.7kB/s]
51%|##### | 21.1M/41.5M [05:11<05:19, 67.1kB/s]
51%|##### | 21.1M/41.5M [05:12<05:08, 69.3kB/s]
51%|##### | 21.1M/41.5M [05:12<04:42, 75.7kB/s]
51%|##### | 21.1M/41.5M [05:12<04:54, 72.6kB/s]
51%|##### | 21.1M/41.5M [05:
12<04:51, 73.1kB/s]
51%|##### | 21.1M/41.5M [05:12<05:02, 70.6kB/s]
51%|#####1 | 21.2M/41.5M [05:12<04:10, 84.9kB/s]
51%|#####1 | 21.2M/41.5M [05:13<04:04, 87.2kB/s]
51%|#####1 | 21.2M/41.5M [05:13<03:57, 89.4kB/s]
51%|#####1 | 21.2M/41.5M [05:13<03:55, 90.4kB/s]
51%|#####1 | 21.2M/41.5M [05:13<03:37, 97.8kB/s]
51%|#####1 | 21.3M/41.5M [05:14<04:25, 79.9kB/s]
51%|#####1 | 21.3M/41.5M [05:14<03:21, 105kB/s]
51%|#####1 | 21.3M/41.5M [05:14<03:27, 102kB/s]
51%|#####1 | 21.3M/41.5M [05:14<03:31, 100kB/s]
51%|#####1 | 21.3M/41.5M [05:14<03:35, 98.1kB/s]
51%|#####1 | 21.4M/41.5M [05:14<03:24, 103kB/s]
52%|#####1 | 21.4M/41.5M [05:15<03:30, 100kB/s]
52%|#####1 | 21.4M/41.5M [05:15<03:08, 112kB/s]
52%|#####1 | 21.4M/41.5M [05:15<03:18, 106kB/s]
52%|#####1 | 21.4M/41.5M [05:15<03:01, 116kB/s]
52%|#####1 | 21.4M/41.5M [05:15<03:12, 109kB/s]
52%|#####1
| 21.5M/41.5M [05:15<03:21, 104kB/s]
52%|#####1 | 21.5M/41.5M [05:16<03:02, 115kB/s]
52%|#####1 | 21.5M/41.5M [05:16<02:51, 122kB/s]
52%|#####1 | 21.5M/41.5M [05:16<03:31, 98.8kB/s]
52%|#####1 | 21.5M/41.5M [05:16<02:57, 118kB/s]
52%|#####1 | 21.6M/41.5M [05:17<04:05, 85.3kB/s]
52%|#####2 | 21.6M/41.5M [05:17<03:46, 92.3kB/s]
52%|#####2 | 21.6M/41.5M [05:17<05:00, 69.3kB/s]
52%|#####2 | 21.6M/41.5M [05:17<04:39, 74.6kB/s]
52%|#####2 | 21.6M/41.5M [05:17<04:42, 73.8kB/s]
52%|#####2 | 21.6M/41.5M [05:18<05:30, 63.0kB/s]
52%|#####2 | 21.7M/41.5M [05:18<04:58, 69.6kB/s]
52%|#####2 | 21.7M/41.5M [05:18<05:24, 64.0kB/s]
52%|#####2 | 21.7M/41.5M [05:19<07:18, 47.4kB/s]
52%|#####2 | 21.7M/41.5M [05:19<05:05, 68.0kB/s]
52%|#####2 | 21.7M/41.5M [05:19<06:51, 50.5kB/s]
52%|#####2 | 21.7M/41.5M [05:19<06:58, 49.6kB/s]
52%|#####2 | 21.7M/41.5M [05:20<08:45,
39.5kB/s]
52%|#####2 | 21.7M/41.5M [05:20<06:46, 50.9kB/s]
52%|#####2 | 21.7M/41.5M [05:20<06:55, 49.9kB/s]
52%|#####2 | 21.8M/41.5M [05:20<07:02, 49.0kB/s]
52%|#####2 | 21.8M/41.5M [05:20<05:41, 60.5kB/s]
52%|#####2 | 21.8M/41.5M [05:20<06:04, 56.7kB/s]
53%|#####2 | 21.8M/41.5M [05:21<05:29, 62.7kB/s]
53%|#####2 | 21.8M/41.5M [05:21<05:31, 62.2kB/s]
53%|#####2 | 21.8M/41.5M [05:21<05:58, 57.6kB/s]
53%|#####2 | 21.8M/41.5M [05:21<05:24, 63.6kB/s]
53%|#####2 | 21.8M/41.5M [05:21<05:27, 62.9kB/s]
53%|#####2 | 21.8M/41.5M [05:22<04:46, 71.9kB/s]
53%|#####2 | 21.9M/41.5M [05:22<05:20, 64.2kB/s]
53%|#####2 | 21.9M/41.5M [05:22<04:42, 72.8kB/s]
53%|#####2 | 21.9M/41.5M [05:22<04:20, 78.8kB/s]
53%|#####2 | 21.9M/41.5M [05:22<04:07, 83.0kB/s]
53%|#####2 | 21.9M/41.5M [05:22<03:58, 85.9kB/s]
53%|#####2 | 21.9M/41.5M [05:23<03:53, 88.0kB/s]
53%|#####2
| 22.0M/41.5M [05:23<03:18, 103kB/s]
53%|#####2 | 22.0M/41.5M [05:23<03:24, 100kB/s]
53%|#####3 | 22.0M/41.5M [05:23<03:02, 112kB/s]
53%|#####3 | 22.0M/41.5M [05:23<03:12, 106kB/s]
53%|#####3 | 22.0M/41.5M [05:24<04:19, 78.6kB/s]
53%|#####3 | 22.1M/41.5M [05:24<02:54, 117kB/s]
53%|#####3 | 22.1M/41.5M [05:24<03:04, 110kB/s]
53%|#####3 | 22.1M/41.5M [05:24<03:12, 106kB/s]
53%|#####3 | 22.1M/41.5M [05:24<02:56, 115kB/s]
53%|#####3 | 22.1M/41.5M [05:25<03:06, 109kB/s]
53%|#####3 | 22.2M/41.5M [05:25<03:42, 91.1kB/s]
53%|#####3 | 22.2M/41.5M [05:25<02:57, 114kB/s]
54%|#####3 | 22.2M/41.5M [05:25<03:06, 109kB/s]
54%|#####3 | 22.2M/41.5M [05:25<03:04, 110kB/s]
54%|#####3 | 22.2M/41.5M [05:26<03:49, 88.1kB/s]
54%|#####3 | 22.3M/41.5M [05:26<02:53, 116kB/s]
54%|#####3 | 22.3M/41.5M [05:26<03:03, 110kB/s]
54%|#####3 | 22.3M/41.5M [05:26<02:53, 116kB/s]
54%|#####3 | 22.3M/41.5M [05:26<03:08, 106kB/s]
54%|#####3 | 22.3M/41.5M [05:27<03:05, 108kB/s]
54%|#####3 | 22.4M/41.5M [05:27<02:51, 117kB/s]
54%|#####3 | 22.4M/41.5M [05:27<02:44, 122kB/s]
54%|#####4 | 22.4M/41.5M [05:27<02:53, 115kB/s]
54%|#####4 | 22.4M/41.5M [05:27<02:43, 122kB/s]
54%|#####4 | 22.5M/41.5M [05:28<02:37, 127kB/s]
54%|#####4 | 22.5M/41.5M [05:28<02:50, 117kB/s]
54%|#####4 | 22.5M/41.5M [05:28<02:41, 124kB/s]
54%|#####4 | 22.5M/41.5M [05:28<03:41, 89.8kB/s]
54%|#####4 | 22.5M/41.5M [05:28<02:48, 118kB/s]
54%|#####4 | 22.6M/41.5M [05:29<02:58, 111kB/s]
54%|#####4 | 22.6M/41.5M [05:29<03:06, 106kB/s]
54%|#####4 | 22.6M/41.5M [05:29<03:12, 103kB/s]
54%|#####4 | 22.6M/41.5M [05:29<03:18, 99.9kB/s]
55%|#####4 | 22.6M/41.5M [05:29<04:20, 76.0kB/s]
55%|#####4 | 22.6M/41.5M [05:30<04:06, 80.1kB/s]
55%|#####4 | 22.7M/41.5M [05:30
<04:14, 77.5kB/s]
55%|#####4 | 22.7M/41.5M [05:30<04:45, 69.0kB/s]
55%|#####4 | 22.7M/41.5M [05:30<04:24, 74.6kB/s]
55%|#####4 | 22.7M/41.5M [05:31<05:11, 63.3kB/s]
55%|#####4 | 22.7M/41.5M [05:31<04:34, 71.7kB/s]
55%|#####4 | 22.7M/41.5M [05:31<04:44, 69.2kB/s]
55%|#####4 | 22.8M/41.5M [05:31<04:23, 74.4kB/s]
55%|#####4 | 22.8M/41.5M [05:32<04:08, 78.9kB/s]
55%|#####4 | 22.8M/41.5M [05:32<03:57, 82.4kB/s]
55%|#####4 | 22.8M/41.5M [05:32<03:50, 85.1kB/s]
55%|#####5 | 22.8M/41.5M [05:32<04:17, 76.1kB/s]
55%|#####5 | 22.8M/41.5M [05:32<04:03, 80.4kB/s]
55%|#####5 | 22.9M/41.5M [05:33<03:53, 83.7kB/s]
55%|#####5 | 22.9M/41.5M [05:33<03:46, 86.2kB/s]
55%|#####5 | 22.9M/41.5M [05:33<04:44, 68.5kB/s]
55%|#####5 | 22.9M/41.5M [05:33<03:48, 85.4kB/s]
55%|#####5 | 22.9M/41.5M [05:33<03:43, 87.3kB/s]
55%|#####5 | 22.9M/41.5M [05:34<03:39, 88.8kB/s]
55%|#
####5 | 23.0M/41.5M [05:34<03:36, 89.9kB/s]
55%|#####5 | 23.0M/41.5M [05:34<03:34, 90.7kB/s]
55%|#####5 | 23.0M/41.5M [05:34<03:32, 91.3kB/s]
55%|#####5 | 23.0M/41.5M [05:34<03:28, 92.7kB/s]
55%|#####5 | 23.0M/41.5M [05:35<03:28, 92.8kB/s]
56%|#####5 | 23.0M/41.5M [05:35<03:28, 92.7kB/s]
56%|#####5 | 23.1M/41.5M [05:35<03:02, 106kB/s]
56%|#####5 | 23.1M/41.5M [05:35<03:09, 102kB/s]
56%|#####5 | 23.1M/41.5M [05:35<03:14, 99.4kB/s]
56%|#####5 | 23.1M/41.5M [05:36<03:44, 85.8kB/s]
56%|#####5 | 23.1M/41.5M [05:36<02:54, 110kB/s]
56%|#####5 | 23.2M/41.5M [05:36<03:01, 106kB/s]
56%|#####5 | 23.2M/41.5M [05:36<03:08, 102kB/s]
56%|#####5 | 23.2M/41.5M [05:36<03:12, 99.5kB/s]
56%|#####5 | 23.2M/41.5M [05:37<03:16, 97.5kB/s]
56%|#####5 | 23.2M/41.5M [05:37<03:19, 96.0kB/s]
56%|#####6 | 23.2M/41.5M [05:37<03:21, 95.0kB/s]
56%|#####6 | 23.3M/41.5M [05:37<0
3:37, 88.1kB/s]
56%|#####6 | 23.3M/41.5M [05:37<03:33, 89.4kB/s]
56%|#####6 | 23.3M/41.5M [05:37<03:31, 90.3kB/s]
56%|#####6 | 23.3M/41.5M [05:38<03:29, 91.0kB/s]
56%|#####6 | 23.3M/41.5M [05:38<03:28, 91.6kB/s]
56%|#####6 | 23.3M/41.5M [05:38<03:27, 91.9kB/s]
56%|#####6 | 23.4M/41.5M [05:38<03:11, 99.4kB/s]
56%|#####6 | 23.4M/41.5M [05:38<03:15, 97.3kB/s]
56%|#####6 | 23.4M/41.5M [05:38<02:52, 110kB/s]
56%|#####6 | 23.4M/41.5M [05:39<02:39, 119kB/s]
56%|#####6 | 23.4M/41.5M [05:39<02:50, 111kB/s]
57%|#####6 | 23.5M/41.5M [05:39<02:38, 120kB/s]
57%|#####6 | 23.5M/41.5M [05:39<02:30, 126kB/s]
57%|#####6 | 23.5M/41.5M [05:39<02:25, 130kB/s]
57%|#####6 | 23.5M/41.5M [05:40<02:22, 132kB/s]
57%|#####6 | 23.5M/41.5M [05:40<03:02, 103kB/s]
57%|#####6 | 23.6M/41.5M [05:40<02:18, 135kB/s]
57%|#####6 | 23.6M/41.5M [05:40<02:25, 129kB/s]
57%|#####6 |
23.6M/41.5M [05:40<02:23, 130kB/s]
57%|#####6 | 23.6M/41.5M [05:40<02:23, 131kB/s]
57%|#####6 | 23.6M/41.5M [05:41<02:25, 129kB/s]
57%|#####7 | 23.7M/41.5M [05:41<02:21, 132kB/s]
57%|#####7 | 23.7M/41.5M [05:41<02:12, 141kB/s]
57%|#####7 | 23.7M/41.5M [05:41<02:13, 140kB/s]
57%|#####7 | 23.7M/41.5M [05:41<02:13, 139kB/s]
57%|#####7 | 23.8M/41.5M [05:41<02:13, 139kB/s]
57%|#####7 | 23.8M/41.5M [05:42<03:02, 102kB/s]
57%|#####7 | 23.8M/41.5M [05:42<02:01, 153kB/s]
57%|#####7 | 23.8M/41.5M [05:42<02:09, 143kB/s]
58%|#####7 | 23.9M/41.5M [05:42<02:29, 123kB/s]
58%|#####7 | 23.9M/41.5M [05:43<02:34, 119kB/s]
58%|#####7 | 23.9M/41.5M [05:43<02:28, 124kB/s]
58%|#####7 | 23.9M/41.5M [05:43<03:02, 101kB/s]
58%|#####7 | 24.0M/41.5M [05:43<02:11, 140kB/s]
58%|#####7 | 24.0M/41.5M [05:43<02:20, 131kB/s]
58%|#####7 | 24.0M/41.5M [05:44<02:17, 133kB/s]
58%|#
####7 | 24.0M/41.5M [05:44<02:19, 131kB/s]
58%|#####7 | 24.1M/41.5M [05:44<02:05, 145kB/s]
58%|#####8 | 24.1M/41.5M [05:44<02:11, 139kB/s]
58%|#####8 | 24.1M/41.5M [05:44<02:15, 134kB/s]
58%|#####8 | 24.1M/41.5M [05:44<02:14, 136kB/s]
58%|#####8 | 24.1M/41.5M [05:44<02:01, 150kB/s]
58%|#####8 | 24.2M/41.5M [05:45<01:53, 161kB/s]
58%|#####8 | 24.2M/41.5M [05:45<01:58, 153kB/s]
58%|#####8 | 24.2M/41.5M [05:45<02:05, 144kB/s]
58%|#####8 | 24.2M/41.5M [05:45<02:11, 138kB/s]
58%|#####8 | 24.2M/41.5M [05:45<02:10, 138kB/s]
58%|#####8 | 24.3M/41.5M [05:45<01:59, 152kB/s]
59%|#####8 | 24.3M/41.5M [05:45<01:51, 162kB/s]
59%|#####8 | 24.3M/41.5M [05:46<01:56, 154kB/s]
59%|#####8 | 24.3M/41.5M [05:46<01:51, 162kB/s]
59%|#####8 | 24.4M/41.5M [05:46<01:56, 154kB/s]
59%|#####8 | 24.4M/41.5M [05:46<01:51, 162kB/s]
59%|#####8 | 24.4M/41.5M [05:46<01:46, 169kB/s]
59%|#####8 | 24.4M/41.5M [05:46<01:42, 175kB/s]
59%|#####8 | 24.4M/41.5M [05:46<01:41, 177kB/s]
59%|#####8 | 24.5M/41.5M [05:47<01:43, 172kB/s]
59%|#####9 | 24.5M/41.5M [05:47<01:47, 166kB/s]
59%|#####9 | 24.5M/41.5M [05:47<02:17, 129kB/s]
59%|#####9 | 24.6M/41.5M [05:47<01:38, 181kB/s]
59%|#####9 | 24.6M/41.5M [05:47<01:44, 169kB/s]
59%|#####9 | 24.6M/41.5M [05:48<01:49, 162kB/s]
59%|#####9 | 24.7M/41.5M [05:48<01:44, 169kB/s]
59%|#####9 | 24.7M/41.5M [05:48<01:41, 174kB/s]
60%|#####9 | 24.7M/41.5M [05:48<02:21, 124kB/s]
60%|#####9 | 24.7M/41.5M [05:48<01:55, 152kB/s]
60%|#####9 | 24.8M/41.5M [05:49<01:42, 171kB/s]
60%|#####9 | 24.8M/41.5M [05:49<01:48, 162kB/s]
60%|#####9 | 24.8M/41.5M [05:49<01:52, 155kB/s]
60%|#####9 | 24.8M/41.5M [05:49<02:14, 130kB/s]
60%|#####9 | 24.9M/41.5M [05:49<01:57, 148kB/s]
60%|#####9 | 24.9M/41.5M [05:50<02:3
9, 109kB/s]
60%|###### | 24.9M/41.5M [05:50<02:23, 121kB/s]
60%|###### | 24.9M/41.5M [05:50<02:28, 117kB/s]
60%|###### | 25.0M/41.5M [05:50<02:37, 110kB/s]
60%|###### | 25.0M/41.5M [05:50<02:44, 106kB/s]
60%|###### | 25.0M/41.5M [05:51<02:49, 102kB/s]
60%|###### | 25.0M/41.5M [05:51<02:37, 109kB/s]
60%|###### | 25.0M/41.5M [05:51<02:40, 108kB/s]
60%|###### | 25.0M/41.5M [05:51<02:31, 114kB/s]
60%|###### | 25.1M/41.5M [05:51<02:35, 111kB/s]
60%|###### | 25.1M/41.5M [05:52<02:28, 116kB/s]
61%|###### | 25.1M/41.5M [05:52<02:20, 123kB/s]
61%|###### | 25.1M/41.5M [05:52<02:26, 117kB/s]
61%|###### | 25.1M/41.5M [05:52<02:18, 124kB/s]
61%|###### | 25.2M/41.5M [05:52<02:13, 129kB/s]
61%|###### | 25.2M/41.5M [05:52<02:09, 132kB/s]
61%|###### | 25.2M/41.5M [05:53<02:00, 141kB/s]
61%|###### | 25.2M/41.5M [05:53<02:01, 141kB/s]
61%|###### | 25.3M/41.5M
[05:53<01:59, 142kB/s]
61%|###### | 25.3M/41.5M [05:53<02:00, 141kB/s]
61%|######1 | 25.3M/41.5M [05:53<01:49, 155kB/s]
61%|######1 | 25.4M/41.5M [05:53<01:43, 164kB/s]
61%|######1 | 25.4M/41.5M [05:54<01:47, 156kB/s]
61%|######1 | 25.4M/41.5M [05:54<01:42, 165kB/s]
61%|######1 | 25.4M/41.5M [05:54<01:38, 171kB/s]
61%|######1 | 25.5M/41.5M [05:54<01:35, 176kB/s]
61%|######1 | 25.5M/41.5M [05:54<01:33, 179kB/s]
62%|######1 | 25.5M/41.5M [05:55<01:32, 181kB/s]
62%|######1 | 25.6M/41.5M [05:55<01:25, 196kB/s]
62%|######1 | 25.6M/41.5M [05:55<01:20, 207kB/s]
62%|######1 | 25.7M/41.5M [05:55<01:12, 228kB/s]
62%|######1 | 25.7M/41.5M [05:55<01:08, 243kB/s]
62%|######2 | 25.8M/41.5M [05:55<01:01, 268kB/s]
62%|######2 | 25.8M/41.5M [05:56<00:57, 285kB/s]
62%|######2 | 25.9M/41.5M [05:56<00:52, 311kB/s]
63%|######2 | 26.0M/41.5M [05:56<00:45, 356kB/s]
63%|######2 | 2
6.0M/41.5M [05:56<00:53, 304kB/s]
63%|######2 | 26.1M/41.5M [05:56<00:39, 411kB/s]
63%|######3 | 26.2M/41.5M [05:56<00:40, 399kB/s]
63%|######3 | 26.2M/41.5M [05:57<00:42, 376kB/s]
63%|######3 | 26.3M/41.5M [05:57<00:53, 298kB/s]
64%|######3 | 26.4M/41.5M [05:57<00:41, 386kB/s]
64%|######3 | 26.4M/41.5M [05:57<00:44, 358kB/s]
64%|######3 | 26.5M/41.5M [05:58<00:57, 274kB/s]
64%|######4 | 26.6M/41.5M [05:58<00:47, 329kB/s]
64%|######4 | 26.6M/41.5M [05:58<00:51, 305kB/s]
64%|######4 | 26.7M/41.5M [05:58<00:52, 298kB/s]
64%|######4 | 26.7M/41.5M [05:58<00:55, 280kB/s]
64%|######4 | 26.8M/41.5M [05:59<00:58, 266kB/s]
65%|######4 | 26.8M/41.5M [05:59<00:57, 270kB/s]
65%|######4 | 26.8M/41.5M [05:59<00:56, 272kB/s]
65%|######4 | 26.9M/41.5M [05:59<00:58, 260kB/s]
65%|######4 | 26.9M/41.5M [05:59<00:57, 266kB/s]
65%|######5 | 27.0M/41.5M [05:59<00:56, 269kB/s]
65%|##
####5 | 27.0M/41.5M [06:00<00:55, 272kB/s]
65%|######5 | 27.1M/41.5M [06:00<00:58, 260kB/s]
65%|######5 | 27.1M/41.5M [06:00<00:56, 266kB/s]
65%|######5 | 27.2M/41.5M [06:00<00:55, 269kB/s]
66%|######5 | 27.2M/41.5M [06:00<00:55, 272kB/s]
66%|######5 | 27.2M/41.5M [06:01<00:54, 274kB/s]
66%|######5 | 27.3M/41.5M [06:01<00:51, 289kB/s]
66%|######5 | 27.4M/41.5M [06:01<00:51, 286kB/s]
66%|######6 | 27.4M/41.5M [06:01<00:52, 284kB/s]
66%|######6 | 27.5M/41.5M [06:01<00:49, 296kB/s]
66%|######6 | 27.5M/41.5M [06:01<00:50, 291kB/s]
66%|######6 | 27.6M/41.5M [06:02<00:48, 301kB/s]
67%|######6 | 27.6M/41.5M [06:02<00:47, 308kB/s]
67%|######6 | 27.7M/41.5M [06:02<00:48, 299kB/s]
67%|######6 | 27.7M/41.5M [06:02<00:47, 307kB/s]
67%|######6 | 27.8M/41.5M [06:02<00:46, 312kB/s]
67%|######7 | 27.8M/41.5M [06:02<00:43, 330kB/s]
67%|######7 | 27.9M/41.5M [06:03<00:43, 328kB/s]
67%|######7 | 27.9M/41.5M [06:03<00:43, 327kB/s]
67%|######7 | 28.0M/41.5M [06:03<00:41, 340kB/s]
68%|######7 | 28.1M/41.5M [06:03<00:41, 336kB/s]
68%|######7 | 28.1M/41.5M [06:03<00:40, 346kB/s]
68%|######7 | 28.2M/41.5M [06:04<00:39, 354kB/s]
68%|######8 | 28.2M/41.5M [06:04<00:38, 359kB/s]
68%|######8 | 28.3M/41.5M [06:04<00:35, 390kB/s]
68%|######8 | 28.4M/41.5M [06:04<00:34, 399kB/s]
69%|######8 | 28.5M/41.5M [06:04<00:32, 418kB/s]
69%|######8 | 28.6M/41.5M [06:04<00:30, 446kB/s]
69%|######9 | 28.6M/41.5M [06:05<00:28, 465kB/s]
69%|######9 | 28.7M/41.5M [06:05<00:27, 492kB/s]
70%|######9 | 28.8M/41.5M [06:05<00:25, 526kB/s]
70%|######9 | 28.9M/41.5M [06:05<00:23, 563kB/s]
70%|####### | 29.1M/41.5M [06:05<00:21, 602kB/s]
70%|####### | 29.2M/41.5M [06:05<00:20, 631kB/s]
71%|####### | 29.3M/41.5M [06:06<00:18, 678kB/s]
71%|####### | 29.4M/41.5M [06:06<00:16
, 756kB/s]
71%|#######1 | 29.6M/41.5M [06:06<00:15, 829kB/s]
72%|#######1 | 29.7M/41.5M [06:06<00:14, 854kB/s]
72%|#######1 | 29.8M/41.5M [06:06<00:14, 824kB/s]
72%|#######2 | 29.9M/41.5M [06:06<00:14, 856kB/s]
72%|#######2 | 30.1M/41.5M [06:06<00:12, 962kB/s]
73%|#######2 | 30.2M/41.5M [06:07<00:16, 706kB/s]
73%|#######3 | 30.4M/41.5M [06:07<00:11, 1.00MB/s]
74%|#######3 | 30.5M/41.5M [06:07<00:11, 972kB/s]
74%|#######3 | 30.7M/41.5M [06:07<00:11, 1.00MB/s]
74%|#######4 | 30.8M/41.5M [06:07<00:16, 703kB/s]
75%|#######4 | 30.9M/41.5M [06:08<00:13, 819kB/s]
75%|#######4 | 31.1M/41.5M [06:08<00:14, 770kB/s]
75%|#######5 | 31.2M/41.5M [06:08<00:14, 735kB/s]
75%|#######5 | 31.3M/41.5M [06:08<00:14, 756kB/s]
76%|#######5 | 31.4M/41.5M [06:08<00:13, 783kB/s]
76%|#######5 | 31.5M/41.5M [06:08<00:17, 591kB/s]
76%|#######6 | 31.6M/41.5M [06:09<00:14, 715kB/s]
76%|#######6 | 31.7M/41.
5M [06:09<00:14, 693kB/s]
77%|#######6 | 31.8M/41.5M [06:09<00:16, 635kB/s]
77%|#######6 | 31.9M/41.5M [06:09<00:16, 610kB/s]
77%|#######7 | 32.0M/41.5M [06:09<00:16, 594kB/s]
77%|#######7 | 32.1M/41.5M [06:09<00:15, 627kB/s]
77%|#######7 | 32.1M/41.5M [06:10<00:16, 605kB/s]
78%|#######7 | 32.2M/41.5M [06:10<00:18, 527kB/s]
78%|#######7 | 32.3M/41.5M [06:10<00:17, 557kB/s]
78%|#######7 | 32.4M/41.5M [06:10<00:16, 592kB/s]
78%|#######8 | 32.4M/41.5M [06:10<00:15, 630kB/s]
78%|#######8 | 32.5M/41.5M [06:10<00:15, 606kB/s]
79%|#######8 | 32.6M/41.5M [06:10<00:16, 562kB/s]
79%|#######8 | 32.7M/41.5M [06:11<00:15, 598kB/s]
79%|#######8 | 32.8M/41.5M [06:11<00:14, 637kB/s]
79%|#######9 | 32.8M/41.5M [06:11<00:14, 609kB/s]
79%|#######9 | 32.9M/41.5M [06:11<00:16, 563kB/s]
79%|#######9 | 33.0M/41.5M [06:11<00:14, 600kB/s]
80%|#######9 | 33.1M/41.5M [06:11<00:13, 639kB/s]
80%|#######9
| 33.1M/41.5M [06:11<00:14, 609kB/s]
80%|#######9 | 33.2M/41.5M [06:11<00:15, 563kB/s]
80%|######## | 33.3M/41.5M [06:12<00:14, 600kB/s]
80%|######## | 33.4M/41.5M [06:12<00:13, 640kB/s]
81%|######## | 33.4M/41.5M [06:12<00:13, 609kB/s]
81%|######## | 33.5M/41.5M [06:12<00:14, 563kB/s]
81%|######## | 33.6M/41.5M [06:12<00:13, 618kB/s]
81%|########1 | 33.7M/41.5M [06:12<00:12, 652kB/s]
81%|########1 | 33.7M/41.5M [06:12<00:13, 619kB/s]
81%|########1 | 33.8M/41.5M [06:13<00:14, 569kB/s]
82%|########1 | 33.9M/41.5M [06:13<00:13, 605kB/s]
82%|########1 | 34.0M/41.5M [06:13<00:12, 642kB/s]
82%|########2 | 34.0M/41.5M [06:13<00:12, 612kB/s]
82%|########2 | 34.1M/41.5M [06:13<00:13, 565kB/s]
82%|########2 | 34.2M/41.5M [06:13<00:12, 601kB/s]
83%|########2 | 34.3M/41.5M [06:13<00:11, 641kB/s]
83%|########2 | 34.3M/41.5M [06:13<00:12, 610kB/s]
83%|########2 | 34.4M/41.5M [06:14<00:12, 582kB/s]
83%
|########3 | 34.5M/41.5M [06:14<00:11, 628kB/s]
83%|########3 | 34.6M/41.5M [06:14<00:10, 663kB/s]
84%|########3 | 34.7M/41.5M [06:14<00:11, 645kB/s]
84%|########3 | 34.7M/41.5M [06:14<00:12, 590kB/s]
84%|########3 | 34.8M/41.5M [06:14<00:11, 635kB/s]
84%|########4 | 34.9M/41.5M [06:14<00:10, 640kB/s]
84%|########4 | 35.0M/41.5M [06:15<00:09, 701kB/s]
85%|########4 | 35.1M/41.5M [06:15<00:09, 673kB/s]
85%|########4 | 35.2M/41.5M [06:15<00:10, 629kB/s]
85%|########5 | 35.3M/41.5M [06:15<00:09, 680kB/s]
85%|########5 | 35.4M/41.5M [06:15<00:12, 502kB/s]
86%|########5 | 35.5M/41.5M [06:15<00:09, 688kB/s]
86%|########5 | 35.6M/41.5M [06:15<00:09, 672kB/s]
86%|########6 | 35.7M/41.5M [06:16<00:09, 672kB/s]
86%|########6 | 35.8M/41.5M [06:16<00:09, 652kB/s]
86%|########6 | 35.8M/41.5M [06:16<00:10, 570kB/s]
87%|########6 | 35.9M/41.5M [06:16<00:10, 577kB/s]
87%|########6 | 36.0M/41.5M [06:16<00:09, 637kB/
s]
87%|########7 | 36.1M/41.5M [06:16<00:08, 665kB/s]
87%|########7 | 36.2M/41.5M [06:16<00:08, 646kB/s]
87%|########7 | 36.2M/41.5M [06:17<00:09, 593kB/s]
88%|########7 | 36.3M/41.5M [06:17<00:08, 615kB/s]
88%|########7 | 36.5M/41.5M [06:17<00:08, 642kB/s]
88%|########8 | 36.6M/41.5M [06:17<00:07, 660kB/s]
88%|########8 | 36.7M/41.5M [06:17<00:07, 671kB/s]
89%|########8 | 36.8M/41.5M [06:17<00:06, 733kB/s]
89%|########8 | 36.9M/41.5M [06:18<00:08, 560kB/s]
89%|########9 | 37.0M/41.5M [06:18<00:06, 699kB/s]
89%|########9 | 37.1M/41.5M [06:18<00:07, 641kB/s]
90%|########9 | 37.2M/41.5M [06:18<00:07, 613kB/s]
90%|########9 | 37.3M/41.5M [06:18<00:07, 584kB/s]
90%|######### | 37.4M/41.5M [06:19<00:07, 576kB/s]
90%|######### | 37.5M/41.5M [06:19<00:07, 584kB/s]
91%|######### | 37.6M/41.5M [06:19<00:07, 576kB/s]
91%|######### | 37.7M/41.5M [06:19<00:06, 584kB/s]
91%|#########1| 37.8M/41.5M [06:19<00
:06, 590kB/s]
91%|#########1| 37.9M/41.5M [06:19<00:06, 594kB/s]
92%|#########1| 38.0M/41.5M [06:20<00:06, 596kB/s]
92%|#########1| 38.1M/41.5M [06:20<00:07, 494kB/s]
92%|#########2| 38.2M/41.5M [06:20<00:05, 616kB/s]
92%|#########2| 38.3M/41.5M [06:20<00:06, 558kB/s]
92%|#########2| 38.3M/41.5M [06:20<00:06, 530kB/s]
93%|#########2| 38.4M/41.5M [06:20<00:06, 521kB/s]
93%|#########2| 38.5M/41.5M [06:21<00:05, 520kB/s]
93%|#########3| 38.6M/41.5M [06:21<00:05, 517kB/s]
93%|#########3| 38.7M/41.5M [06:21<00:05, 529kB/s]
93%|#########3| 38.8M/41.5M [06:21<00:05, 537kB/s]
94%|#########3| 38.9M/41.5M [06:21<00:05, 543kB/s]
94%|#########3| 39.0M/41.5M [06:22<00:04, 547kB/s]
94%|#########4| 39.1M/41.5M [06:22<00:04, 550kB/s]
94%|#########4| 39.2M/41.5M [06:22<00:04, 566kB/s]
95%|#########4| 39.3M/41.5M [06:22<00:04, 577kB/s]
95%|#########4| 39.4M/41.5M [06:22<00:03, 585kB/s]
95%|#########5| 39.5M/41.5
M [06:22<00:03, 590kB/s]
95%|#########5| 39.6M/41.5M [06:23<00:03, 594kB/s]
96%|#########5| 39.7M/41.5M [06:23<00:03, 611kB/s]
96%|#########5| 39.8M/41.5M [06:23<00:02, 608kB/s]
96%|#########6| 39.9M/41.5M [06:23<00:02, 607kB/s]
96%|#########6| 40.0M/41.5M [06:23<00:02, 606kB/s]
97%|#########6| 40.1M/41.5M [06:23<00:02, 619kB/s]
97%|#########6| 40.2M/41.5M [06:24<00:02, 614kB/s]
97%|#########7| 40.3M/41.5M [06:24<00:02, 611kB/s]
97%|#########7| 40.4M/41.5M [06:24<00:01, 608kB/s]
98%|#########7| 40.5M/41.5M [06:24<00:01, 607kB/s]
98%|#########7| 40.6M/41.5M [06:24<00:01, 620kB/s]
98%|#########8| 40.7M/41.5M [06:25<00:01, 615kB/s]
98%|#########8| 40.8M/41.5M [06:25<00:01, 611kB/s]
99%|#########8| 40.9M/41.5M [06:25<00:00, 623kB/s]
99%|#########8| 41.0M/41.5M [06:25<00:00, 617kB/s]
99%|#########9| 41.1M/41.5M [06:25<00:00, 613kB/s]
99%|#########9| 41.2M/41.5M [06:25<00:00, 624kB/s]
100%|#########9|
41.3M/41.5M [06:26<00:00, 512kB/s]
100%|#########9| 41.4M/41.5M [06:26<00:00, 604kB/s]
100%|##########| 41.5M/41.5M [06:26<00:00, 113kB/s]
@@ -283,6 +283,11 @@ Look up prediction top 1 index in 1000 class synset.
+.. rst-class:: sphx-glr-timing
+
+ **Total running time of the script:** ( 6 minutes 50.106 seconds)
+
+
.. _sphx_glr_download_how_to_compile_models_from_oneflow.py:
diff --git a/docs/_sources/how_to/compile_models/from_paddle.rst.txt b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
index be1dffda6..681b21e4a 100644
--- a/docs/_sources/how_to/compile_models/from_paddle.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
@@ -201,7 +201,7 @@ Look up prediction top 1 index in 1000 class synset.
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 1 minutes 23.283 seconds)
+ **Total running time of the script:** ( 1 minutes 4.115 seconds)
.. _sphx_glr_download_how_to_compile_models_from_paddle.py:
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 7cb90a696..9de3c9a8f 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -79,7 +79,7 @@ Load a pretrained PyTorch model
.. code-block:: none
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
0%| | 0.00/44.7M [00:00<?, ?B/s]
10%|# | 4.61M/44.7M [00:00<00:00, 48.2MB/s]
42%|####2 | 18.8M/44.7M [00:00<00:00, 107MB/s]
73%|#######3 | 32.8M/44.7M [00:00<00:00, 125MB/s]
100%|##########| 44.7M/44.7M [00:00<00:00, 128MB/s]
+
0%| | 0.00/44.7M [00:00<?, ?B/s]
53%|#####2 | 23.6M/44.7M [00:00<00:00, 248MB/s]
100%|##########| 44.7M/44.7M [00:00<00:00, 272MB/s]
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 1f99017ad..43d9b260b 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -370,11 +370,6 @@ Run the corresponding model on tensorflow
-.. rst-class:: sphx-glr-timing
-
- **Total running time of the script:** ( 1 minutes 2.150 seconds)
-
-
.. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 3be9b8044..f710f77c7 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,15 +5,15 @@
Computation times
=================
-**05:35.207** total execution time for **how_to_compile_models** files:
+**11:30.048** total execution time for **how_to_compile_models** files:
-- **01:23.283**: :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)
-- **01:02.150**: :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``)
-- **00:55.986**: :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)
-- **00:31.428**: :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)
-- **00:24.838**: :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)
-- **00:21.133**: :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)
-- **00:20.815**: :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)
-- **00:19.230**: :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)
-- **00:13.473**: :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)
-- **00:02.871**: :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)
+- **06:50.106**: :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)
+- **01:04.115**: :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)
+- **00:59.156**: :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``)
+- **00:56.570**: :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)
+- **00:24.524**: :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)
+- **00:20.600**: :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)
+- **00:20.526**: :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)
+- **00:19.185**: :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)
+- **00:12.821**: :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)
+- **00:02.445**: :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 34e8baa66..0d532ebad 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -393,7 +393,7 @@ Execute on TVM
Evaluate inference time cost...
Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 16.1288 16.1018 16.3555 16.0681 0.0847
+ 15.8762 15.5255 16.7675 15.4631 0.4818
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 9eb2b8fbf..aaf6b010b 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -108,7 +108,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
.. code-block:: none
Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
0%| | 0.00/170M [00:00<?, ?B/s]
1%|1 | 1.94M/170M [00:00<00:08, 20.0MB/s]
2%|2 | 3.84M/170M [00:00<00:09, 17.7MB/s]
4%|3 | 6.73M/170M [00:00<00:07, 23.1MB/s]
5%|5 | 9.01M/170M [00:00<00:07, 23.1MB/s]
7%|6 | 11.9M/170M [00:00<00:06, 25.4MB/s]
8%|8 | 14.3M/170M [00:00<00:06, 25.0MB/s]
10%|9 | 16.9M/170M [00:00<00:06, 25.6MB/s]
12%|#1 | 20.1M/170M [00:00<00:05, 28.0MB/s]
13%|#3 | 22.8M/170M [00:01<00:06, 22.9MB/s]
15%|#4 | 25.1M/170M [00:01<00:07, 21.3MB/s]
16%|#6 | 27.5M/170M [00:01<00:06, 22.0MB/s]
17%|#7 | 29.7M/170M [00:01<00:06, 21.7MB/s]
20%|#9 | 33.4M/170M [00:01<00:05, 26.2MB/s]
21%|##1 | 36.1M/170M [00:01<00:05, 27.0MB/s]
23%|##2 | 38.8M/170M [00:01<00:05, 26.5MB/s]
25%|##4 | 42.4M/170M [00:01<00:04, 29.4MB/s]
28%|##7 | 47.1M/170M [00:01<00:03, 35.2MB/
s]
30%|##9 | 50.7M/170M [00:01<00:03, 35.5MB/s]
32%|###2 | 55.1M/170M [00:02<00:03, 38.8MB/s]
35%|###4 | 58.9M/170M [00:02<00:03, 35.9MB/s]
37%|###6 | 62.4M/170M [00:02<00:03, 31.1MB/s]
39%|###8 | 65.5M/170M [00:02<00:03, 31.3MB/s]
40%|#### | 68.5M/170M [00:02<00:03, 29.8MB/s]
42%|####2 | 72.1M/170M [00:02<00:03, 31.7MB/s]
44%|####4 | 75.2M/170M [00:02<00:03, 26.7MB/s]
46%|####5 | 77.9M/170M [00:03<00:04, 22.9MB/s]
48%|####8 | 82.3M/170M [00:03<00:03, 27.9MB/s]
51%|##### | 86.0M/170M [00:03<00:02, 30.6MB/s]
52%|#####2 | 89.2M/170M [00:03<00:03, 28.1MB/s]
55%|#####4 | 92.7M/170M [00:03<00:02, 29.5MB/s]
56%|#####6 | 95.6M/170M [00:03<00:03, 25.3MB/s]
58%|#####8 | 98.7M/170M [00:03<00:02, 26.4MB/s]
60%|#####9 | 102M/170M [00:03<00:02, 27.7MB/s]
62%|######2 | 106M/170M [00:03<00:02, 30.6MB/s]
65%|######4 | 110M/170M [00:04<00:01
, 32.0MB/s]
67%|######6 | 113M/170M [00:04<00:01, 33.6MB/s]
69%|######8 | 117M/170M [00:04<00:01, 35.2MB/s]
71%|####### | 120M/170M [00:04<00:01, 35.2MB/s]
73%|#######3 | 125M/170M [00:04<00:01, 37.7MB/s]
76%|#######5 | 128M/170M [00:04<00:01, 29.8MB/s]
77%|#######7 | 131M/170M [00:04<00:01, 27.2MB/s]
80%|######## | 136M/170M [00:04<00:01, 33.1MB/s]
83%|########2 | 140M/170M [00:05<00:00, 34.5MB/s]
85%|########4 | 144M/170M [00:05<00:00, 32.5MB/s]
87%|########6 | 147M/170M [00:05<00:00, 27.3MB/s]
88%|########8 | 150M/170M [00:05<00:00, 25.0MB/s]
90%|########9 | 152M/170M [00:05<00:00, 25.0MB/s]
91%|#########1| 155M/170M [00:05<00:00, 24.2MB/s]
93%|#########2| 158M/170M [00:05<00:00, 26.2MB/s]
95%|#########4| 161M/170M [00:05<00:00, 24.7MB/s]
96%|#########5| 163M/170M [00:06<00:00, 24.3MB/s]
97%|#########7| 165M/170M [00:06<00:00, 24.0MB/s]
99%|#########8| 168M/170M [00:06<00:00, 22.8M
B/s]
100%|##########| 170M/170M [00:06<00:00, 27.9MB/s]
+
0%| | 0.00/170M [00:00<?, ?B/s]
11%|#1 | 19.3M/170M [00:00<00:00, 203MB/s]
27%|##6 | 45.8M/170M [00:00<00:00, 246MB/s]
42%|####2 | 72.1M/170M [00:00<00:00, 260MB/s]
58%|#####7 | 98.3M/170M [00:00<00:00, 266MB/s]
73%|#######3 | 124M/170M [00:00<00:00, 269MB/s]
88%|########8 | 150M/170M [00:00<00:00, 269MB/s]
100%|##########| 170M/170M [00:00<00:00, 264MB/s]
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
for i in range(dim)
/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -253,7 +253,7 @@ Get boxes with score larger than 0.9
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 3 minutes 10.453 seconds)
+ **Total running time of the script:** ( 2 minutes 56.175 seconds)
.. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index b63f1ae82..fad356b93 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -187,7 +187,7 @@ training. Other models require a full post training calibration.
.. code-block:: none
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
0%| | 0.00/13.6M [00:00<?, ?B/s]
10%|# | 1.38M/13.6M [00:00<00:00, 14.5MB/s]
33%|###3 | 4.54M/13.6M [00:00<00:00, 25.4MB/s]
51%|#####1 | 6.97M/13.6M [00:00<00:00, 24.1MB/s]
73%|#######3 | 9.90M/13.6M [00:00<00:00, 25.3MB/s]
91%|######### | 12.3M/13.6M [00:00<00:00, 24.2MB/s]
100%|##########| 13.6M/13.6M [00:00<00:00, 23.7MB/s]
+
0%| | 0.00/13.6M [00:00<?, ?B/s]
100%|##########| 13.6M/13.6M [00:00<00:00, 167MB/s]
@@ -344,7 +344,7 @@ Here we give an example of how to measure performance of TVM compiled models.
Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 90.2412 90.2071 90.8011 90.0909 0.1241
+ 90.1441 90.0327 91.7263 89.8729 0.2893
@@ -384,7 +384,7 @@ TODO
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 1 minutes 5.816 seconds)
+ **Total running time of the script:** ( 1 minutes 3.230 seconds)
.. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index 847d33e8a..323045be1 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -351,7 +351,7 @@ Here we give an example of how to measure performance of TVM compiled models.
Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 117.0151 116.9941 119.1223 115.3432 0.9252
+ 117.5831 117.4965 119.9891 116.6112 0.6789
@@ -385,7 +385,7 @@ Here we give an example of how to measure performance of TVM compiled models.
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 1 minutes 58.607 seconds)
+ **Total running time of the script:** ( 1 minutes 55.881 seconds)
.. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 13df07cbd..add212834 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -221,7 +221,7 @@ We create a Relay VM to build and execute the model.
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 2 minutes 21.769 seconds)
+ **Total running time of the script:** ( 1 minutes 7.308 seconds)
.. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 1ec8f3c79..8723cf707 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -137,7 +137,7 @@ Convert and compile model for CPU.
data: None
input_sym_arg_type = in_param.infer_type()[0]
Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
0%| | 0/132723 [00:00<?, ?KB/s]
5%|5 | 6999/132723 [00:00<00:01, 69969.38KB/s]
12%|#1 | 15727/132723 [00:00<00:01, 80147.99KB/s]
19%|#8 | 24560/132723 [00:00<00:01, 83881.04KB/s]
25%|##4 | 32949/132723 [00:00<00:01, 80221.28KB/s]
31%|###1 | 41794/132723 [00:00<00:01, 83102.74KB/s]
38%|###8 | 50653/132723 [00:00<00:00, 84931.94KB/s]
45%|####4 | 59479/132723 [00:00<00:00, 86003.43KB/s]
52%|#####1 | 68362/132723 [00:00<00:00, 86889.43KB/s]
58%|#####8 | 77304/132723 [00:00<00:00, 87674.86KB/s]
65%|######4 | 86187/132723 [00:01<00:00, 88027.99KB/s]
72%|#######1 | 95042/132723 [00:01<00:00, 88183.97KB/s]
78%|#######8 | 103982/132723 [00:01<00:00, 88551.68KB/s]
85%|########5 | 112918/132723 [00:01<00:00, 88793.65KB/s]
92%|#########1| 121800/132723 [00:01<00:00, 88484.18KB/s]
98%|#########8| 130681/132723 [00:01<00:00, 88579.50KB/s]
100%|#######
###| 132723/132723 [00:01<00:00, 86307.82KB/s]
+
0%| | 0/132723 [00:00<?, ?KB/s]
5%|5 | 6757/132723 [00:00<00:01, 67559.27KB/s]
12%|#1 | 15560/132723 [00:00<00:01, 79595.33KB/s]
18%|#8 | 24463/132723 [00:00<00:01, 83901.77KB/s]
25%|##5 | 33310/132723 [00:00<00:01, 85702.78KB/s]
32%|###1 | 42202/132723 [00:00<00:01, 86861.86KB/s]
38%|###8 | 51061/132723 [00:00<00:00, 87447.02KB/s]
45%|####5 | 59862/132723 [00:00<00:00, 87630.07KB/s]
52%|#####1 | 68626/132723 [00:00<00:00, 87538.91KB/s]
58%|#####8 | 77439/132723 [00:00<00:00, 87717.76KB/s]
65%|######4 | 86267/132723 [00:01<00:00, 87889.71KB/s]
72%|#######1 | 95173/132723 [00:01<00:00, 88246.73KB/s]
78%|#######8 | 104014/132723 [00:01<00:00, 88287.51KB/s]
85%|########5 | 112862/132723 [00:01<00:00, 88340.22KB/s]
92%|#########1| 121697/132723 [00:01<00:00, 88298.19KB/s]
98%|#########8| 130598/132723 [00:01<00:00, 88509.55KB/s]
100%|#######
###| 132723/132723 [00:01<00:00, 86935.39KB/s]
@@ -202,7 +202,7 @@ Display result
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 2 minutes 17.852 seconds)
+ **Total running time of the script:** ( 2 minutes 17.619 seconds)
.. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index aa8771b2f..81cf31b98 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
Computation times
=================
-**11:45.672** total execution time for **how_to_deploy_models** files:
+**10:09.203** total execution time for **how_to_deploy_models** files:
-- **03:10.453**: :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
-- **02:21.769**: :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)
-- **02:17.852**: :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
-- **01:58.607**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
-- **01:05.816**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)
-- **00:28.562**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)
-- **00:22.433**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
-- **00:00.181**: :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)
+- **02:56.175**: :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
+- **02:17.619**: :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
+- **01:55.881**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
+- **01:07.308**: :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)
+- **01:03.230**: :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)
+- **00:27.226**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)
+- **00:21.587**: :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
+- **00:00.176**: :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index 6885583b1..a3b9239d8 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -423,7 +423,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
.. code-block:: none
- Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip094144d5-fdd2-4b2b-9231-75f8c8ece3c7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+ Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip7d4ccf53-5f97-4de0-9c7a-502154829388 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
@@ -525,7 +525,7 @@ Now, to actually convert the entire network, we have written `a pass in Relay <h
.. code-block:: none
- Check failed: (lower) is false: Intrinsic lowering function for target llvm, intrinsic name tir.sqrt, type 150 not found
+ Check failed: (lower) is false: FloatImm lowering function for target llvm type 150 not found
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 8ed00780a..b60d3f7ae 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,9 +5,9 @@
Computation times
=================
-**00:37.308** total execution time for **how_to_extend_tvm** files:
+**00:36.948** total execution time for **how_to_extend_tvm** files:
-- **00:33.934**: :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
-- **00:02.178**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)
-- **00:01.008**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)
-- **00:00.187**: :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)
+- **00:33.587**: :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
+- **00:02.171**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)
+- **00:01.009**: :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)
+- **00:00.181**: :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index 55981273a..d6c79f29f 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -199,10 +199,10 @@ profile the execution time of each passes.
.. code-block:: none
Printing results of timing profile...
- InferType: 5844us [5844us] (45.16%; 45.16%)
- FoldScaleAxis: 7098us [2us] (54.84%; 54.84%)
- FoldConstant: 7096us [1481us] (54.83%; 99.97%)
- InferType: 5615us [5615us] (43.39%; 79.13%)
+ InferType: 6108us [6108us] (45.64%; 45.64%)
+ FoldScaleAxis: 7276us [2us] (54.36%; 54.36%)
+ FoldConstant: 7273us [1522us] (54.35%; 99.97%)
+ InferType: 5751us [5751us] (42.97%; 79.07%)
@@ -239,10 +239,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
.. code-block:: none
Printing results of timing profile...
- InferType: 5678us [5678us] (44.65%; 44.65%)
- FoldScaleAxis: 7040us [2us] (55.35%; 55.35%)
- FoldConstant: 7038us [1455us] (55.34%; 99.98%)
- InferType: 5583us [5583us] (43.90%; 79.32%)
+ InferType: 5810us [5810us] (44.54%; 44.54%)
+ FoldScaleAxis: 7233us [2us] (55.46%; 55.46%)
+ FoldConstant: 7232us [1511us] (55.44%; 99.98%)
+ InferType: 5721us [5721us] (43.86%; 79.11%)
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index 80660e445..91faf28bf 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -295,7 +295,7 @@ latency of convolution.
.. code-block:: none
- Convolution: 35.339016 ms
+ Convolution: 40.653210 ms
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index 3350e6edb..ab3720b96 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -628,7 +628,7 @@ be able to run on our build server
.. code-block:: none
- conv2d with tensor core: 8.956960 ms
+ conv2d with tensor core: 10.205289 ms
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index b85cc644c..a580839ab 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -118,8 +118,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
.. code-block:: none
- Numpy running time: 0.018245
- Baseline: 3.306084
+ Numpy running time: 0.017612
+ Baseline: 3.390872
@@ -210,7 +210,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
.. code-block:: none
- Opt1: 0.289051
+ Opt1: 0.307004
@@ -309,7 +309,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
.. code-block:: none
- Opt2: 0.329916
+ Opt2: 0.338097
@@ -401,7 +401,7 @@ the access pattern for A matrix is more cache friendly.
.. code-block:: none
- Opt3: 0.117861
+ Opt3: 0.112429
@@ -520,7 +520,7 @@ flattening.
.. code-block:: none
- Opt4: 0.110710
+ Opt4: 0.110156
@@ -638,7 +638,7 @@ write to C when all the block results are ready.
.. code-block:: none
- Opt5: 0.111893
+ Opt5: 0.111282
@@ -759,7 +759,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
.. code-block:: none
- Opt6: 0.146405
+ Opt6: 0.144558
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 9277c2f4b..e0b14a4fc 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
Computation times
=================
-**00:34.502** total execution time for **how_to_optimize_operators** files:
+**00:34.835** total execution time for **how_to_optimize_operators** files:
-- **00:31.909**: :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)
-- **00:01.424**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
-- **00:01.169**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)
+- **00:32.206**: :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)
+- **00:01.432**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
+- **00:01.198**: :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 8db986d25..c6b9dbb1e 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,11 +5,11 @@
Computation times
=================
-**05:00.449** total execution time for **how_to_tune_with_autoscheduler** files:
-
-- **02:28.084**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
-- **01:18.796**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)
-- **00:39.963**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)
-- **00:17.032**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)
-- **00:08.373**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)
-- **00:08.201**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)
+**04:50.844** total execution time for **how_to_tune_with_autoscheduler** files:
+
+- **02:19.891**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
+- **01:17.690**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)
+- **00:39.503**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)
+- **00:16.807**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)
+- **00:08.768**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)
+- **00:08.185**: :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index e5456e9c1..0a2d3d624 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -223,11 +223,11 @@ cooperative fetching, unrolling and operator fusion.
buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 32;
- allocate(conv2d_nchw: Pointer(local float32), float32, [16]), storage_scope = local;
- allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
- allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
- attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
- conv2d_nchw_1: Buffer(conv2d_nchw, float32, [16], [], scope="local", align=64)[0] = 0f32
+ allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+ allocate(pad_temp.shared: Pointer(shared float32), float32, [1568]), storage_scope = shared;
+ allocate(kernel.shared: Pointer(shared float32), float32, [512]), storage_scope = shared;
+ attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+ conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
conv2d_nchw_1[1] = 0f32
conv2d_nchw_1[2] = 0f32
conv2d_nchw_1[3] = 0f32
@@ -241,943 +241,74 @@ cooperative fetching, unrolling and operator fusion.
conv2d_nchw_1[11] = 0f32
conv2d_nchw_1[12] = 0f32
conv2d_nchw_1[13] = 0f32
- conv2d_nchw_1[14] = 0f32
- conv2d_nchw_1[15] = 0f32
for (rc.outer.outer: int32, 0, 16) {
- for (rx.outer.outer: int32, 0, 3) {
- let cse_var_2: int32 = (rc.outer.outer*1568)
- let cse_var_1: int32 = (rc.outer.outer*288)
- {
- attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 49)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 7), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtyp [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 14), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dty [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 147)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 21), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 28), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 245)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 35), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 42), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 343)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 49), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 56), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 441)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 335)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 70), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 539)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 77), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 84), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 637)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 91), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 98), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dt [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 735)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 105), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 112), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 833)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 119), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 678)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 931)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 133), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 140), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, d [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1029)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 147), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 154), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1127)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 161), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 168), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1225)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 175), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 182), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1323)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1021)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 196), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1421)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 203), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 210), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1519)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 217), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 224), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1617)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 231), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 238), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1715)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 245), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1364)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1813)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 259), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 266), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1911)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 273), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 280), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- if @tir.likely((threadIdx.x_1 < 7), dtype=bool) {
- pad_temp.shared_1[(threadIdx.x_1 + 2009)] = 0f32
- }
- attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope="shared")[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 49)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 49), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 49), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 98)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 98), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 2), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 147)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 147), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 51), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 196)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 196), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 4), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 245)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 245), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 53), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 294)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 294), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 6), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 343)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 343), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 55), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 392), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 8), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 441)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 441), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 57), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 490)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 490), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 10), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 539)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 539), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 59), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 588)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 588), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 12), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 637)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 637), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 61), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 686)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 686), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 14), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 735)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 735), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 63), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 16), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 833)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 833), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 65), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 882)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 882), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 18), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 931)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 931), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 67), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 980)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 980), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 20), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1029)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1029), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 69), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1078)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1078), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 22), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1127)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1127), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 71), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1176), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 24), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1225)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1225), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 73), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1274)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1274), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 26), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1323)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1323), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 75), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1372)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1372), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 28), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1421)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1421), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 77), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1470)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1470), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 30), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- if @tir.likely((threadIdx.x_2 < 17), dtype=bool) {
- kernel.shared_1[(threadIdx.x_2 + 1519)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1519), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 79), 96)*3)) + rx.outer.outer)]
- }
- for (rc.outer.inner: int32, 0, 2) {
- let cse_var_3: int32 = (rc.outer.inner*48)
- {
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[cse_var_3]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 96)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 192)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 288)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 97)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 193)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 289)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 2)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 98)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 194)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 290)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 3)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 99)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 195)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 291)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 4)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 100)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 196)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 292)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 5)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 101)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 197)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 293)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 6)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 102)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 198)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 294)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 7)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 103)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 199)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 295)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 8)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 104)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 200)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 296)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 9)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 105)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 201)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 297)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 10)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 106)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 202)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 298)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 11)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 107)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 203)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 299)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 12)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 108)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 204)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 300)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 13)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 109)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 205)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 301)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 14)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 110)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 206)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 302)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 15)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 111)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 207)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 303)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 16)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 112)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 208)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 304)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 17)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 113)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 209)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 305)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 18)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 114)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 210)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 306)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 19)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 115)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 211)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 307)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 20)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 116)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 212)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 308)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 21)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 117)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 213)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 309)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 22)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 118)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 214)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 310)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 23)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 119)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 215)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 311)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 24)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 120)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 216)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 312)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 25)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 121)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 217)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 313)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 26)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 122)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 218)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 314)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 27)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 123)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 219)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 315)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 28)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 124)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 220)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 316)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 29)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 125)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 221)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 317)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 30)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 126)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 222)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 318)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 31)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 127)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 223)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 319)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 32)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 128)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 224)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 320)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 33)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 129)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 225)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 321)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 34)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 130)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 226)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 322)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 35)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 131)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 227)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 323)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 36)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 132)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 228)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 324)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 37)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 133)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 229)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 325)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 38)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 134)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 230)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 326)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 39)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 135)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 231)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 327)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 40)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 136)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 232)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 328)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 41)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 137)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 233)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 329)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 42)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 138)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 234)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 330)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 43)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 139)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 235)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 331)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 44)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 140)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 236)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 332)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 45)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 141)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 237)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 333)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 46)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 142)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 238)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 334)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 47)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 143)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 239)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 335)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 384)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 480)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 576)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 672)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 385)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 481)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 577)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 673)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 386)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 482)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 578)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 674)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 387)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 483)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 579)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 675)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 388)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 484)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 580)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 676)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 389)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 485)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 581)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 677)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 390)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 486)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 582)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 678)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 391)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 487)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 583)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 679)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 392)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 488)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 584)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 680)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 393)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 489)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 585)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 681)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 394)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 490)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 586)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 682)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 395)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 491)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 587)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 683)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 396)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 492)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 588)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 684)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 397)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 493)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 589)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 685)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 398)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 494)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 590)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 686)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 399)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 495)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 591)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 687)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 400)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 496)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 592)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 688)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 401)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 497)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 593)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 689)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 402)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 498)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 594)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 690)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 403)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 499)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 595)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 691)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 404)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 500)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 596)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 692)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 405)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 501)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 597)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 693)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 406)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 502)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 598)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 694)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 407)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 503)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 599)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 695)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 408)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 504)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 600)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 696)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 409)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 505)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 601)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 697)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 410)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 506)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 602)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 698)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 411)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 507)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 603)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 699)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 412)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 508)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 604)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 700)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 413)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 509)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 605)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 701)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 414)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 510)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 606)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 702)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 415)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 511)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 607)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 703)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 416)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 512)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 608)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 704)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 417)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 513)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 609)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 705)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 418)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 514)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 610)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 706)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 419)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 515)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 611)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 707)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 420)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 516)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 612)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 708)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 421)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 517)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 613)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 709)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 422)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 518)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 614)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 710)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 423)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 519)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 615)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 711)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 424)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 520)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 616)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 712)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 425)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 521)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 617)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 713)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 426)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 522)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 618)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 714)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 427)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 523)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 619)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 715)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 428)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 524)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 620)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 716)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 429)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 525)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 621)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 717)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 430)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 526)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 622)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 718)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 431)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 527)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 623)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 719)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 768)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 864)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 960)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1056)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 769)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 865)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 961)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1057)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 770)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 866)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 962)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1058)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 771)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 867)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 963)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1059)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 772)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 868)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 964)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1060)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 773)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 869)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 965)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1061)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 774)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 870)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 966)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1062)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 775)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 871)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 967)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1063)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 776)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 872)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 968)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1064)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 777)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 873)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 969)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1065)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 778)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 874)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 970)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1066)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 779)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 875)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 971)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1067)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 780)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 876)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 972)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1068)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 781)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 877)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 973)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1069)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 782)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 878)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 974)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1070)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 783)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 879)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 975)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1071)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 784)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 880)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 976)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1072)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 785)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 881)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 977)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1073)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 786)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 882)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 978)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1074)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 787)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 883)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 979)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1075)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 788)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 884)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 980)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1076)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 789)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 885)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 981)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1077)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 790)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 886)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 982)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1078)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 791)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 887)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 983)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1079)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 792)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 888)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 984)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1080)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 793)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 889)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 985)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1081)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 794)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 890)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 986)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1082)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 795)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 891)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 987)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1083)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 796)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 892)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 988)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1084)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 797)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 893)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 989)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1085)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 798)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 894)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 990)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1086)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 799)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 895)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 991)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1087)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 800)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 896)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 992)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1088)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 801)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 897)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 993)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1089)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 802)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 898)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 994)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1090)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 803)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 899)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 995)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1091)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 804)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 900)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 996)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1092)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 805)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 901)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 997)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1093)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 806)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 902)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 998)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1094)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 807)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 903)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 999)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1095)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 808)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 904)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1000)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1096)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 809)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 905)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1001)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1097)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 810)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 906)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1002)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1098)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 811)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 907)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1003)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1099)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 812)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 908)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1004)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1100)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 813)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 909)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1005)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1101)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 814)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 910)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1006)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1102)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 815)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 911)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1007)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1103)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1152)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1248)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1344)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1440)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1153)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1249)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1345)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1441)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1154)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1250)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1346)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1442)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1155)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1251)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1347)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1443)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1156)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1252)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1348)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1444)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1157)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1253)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1349)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1445)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1158)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1254)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1350)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1446)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1159)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1255)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1351)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1447)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1160)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1256)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1352)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1448)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1161)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1257)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1353)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1449)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1162)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1258)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1354)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1450)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1163)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1259)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1355)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1451)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1164)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1260)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1356)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1452)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1165)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1261)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1357)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1453)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1166)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1262)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1358)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1454)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1167)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1263)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1359)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1455)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1168)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1264)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1360)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1456)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1169)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1265)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1361)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1457)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1170)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1266)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1362)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1458)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1171)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1267)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1363)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1459)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1172)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1268)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1364)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1460)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1173)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1269)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1365)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1461)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1174)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1270)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1366)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1462)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1175)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1271)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1367)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1463)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1176)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1272)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1368)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1464)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1177)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1273)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1369)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1465)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1178)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1274)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1370)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1466)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1179)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1275)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1371)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1467)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1180)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1276)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1372)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1468)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1181)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1277)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1373)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1469)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1182)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1278)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1374)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1470)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1183)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1279)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1375)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1471)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1184)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1280)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1376)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1472)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1185)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1281)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1377)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1473)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1186)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1282)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1378)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1474)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1187)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1283)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1379)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1475)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1188)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1284)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1380)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1476)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1189)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1285)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1381)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1477)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1190)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1286)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1382)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1478)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1191)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1287)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1383)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1479)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1192)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1288)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1384)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1480)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1193)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1289)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1385)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1481)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1194)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1290)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1386)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1482)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1195)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1291)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1387)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1483)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1196)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1292)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1388)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1484)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1197)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1293)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1389)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1485)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1198)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1294)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1390)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1486)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1199)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1295)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1391)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1487)]))
+ for (ry.outer.outer: int32, 0, 3) {
+ for (rx.outer.outer: int32, 0, 3) {
+ let cse_var_2: int32 = (rc.outer.outer*288)
+ let cse_var_1: int32 = (ry.outer.outer*3)
+ {
+ for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 28) {
+ let cse_var_3: int32 = (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56)
+ attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ pad_temp.shared_1: Buffer(pad_temp.shared, float32, [1568], [], scope="shared")[(cse_var_3 + threadIdx.x_1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7))) && ((ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7)) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x [...]
+ }
+ attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1: Buffer(kernel.shared, float32, [512], [], scope="shared")[threadIdx.x_2] = kernel[((((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 32)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 7), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 14), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 21), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 32256)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 35), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 42), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 49), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 64512)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ if @tir.likely((threadIdx.x_2 < 8), dtype=bool) {
+ kernel.shared_1[(threadIdx.x_2 + 504)] = kernel[((((((blockIdx.x*73728) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer) + 69120)]
+ }
+ for (rc.outer.inner: int32, 0, 16) {
+ for (ff.outer.inner: int32, 0, 2) {
+ let cse_var_10: int32 = (ff.outer.inner*7)
+ let cse_var_9: int32 = (cse_var_10 + 6)
+ let cse_var_8: int32 = (cse_var_10 + 5)
+ let cse_var_7: int32 = (cse_var_10 + 4)
+ let cse_var_6: int32 = (cse_var_10 + 3)
+ let cse_var_5: int32 = (cse_var_10 + 2)
+ let cse_var_4: int32 = (cse_var_10 + 1)
+ {
+ conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 49)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 50)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 51)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 52)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 53)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 54)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 55)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ }
+ }
}
}
}
}
}
- for (i1.inner: int32, 0, 16) {
- compute[(((blockIdx.x*784) + (i1.inner*49)) + threadIdx.x)] = max((conv2d_nchw_1[i1.inner] + bias[((blockIdx.x*16) + i1.inner)]), 0f32)
+ for (i1.inner: int32, 0, 2) {
+ for (i3.inner: int32, 0, 7) {
+ compute[(((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*16) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+ }
}
}
}
@@ -1230,7 +361,7 @@ We build the binary and check its correctness and performance.
.. code-block:: none
- Execution time of this operator: 0.229 ms
+ Execution time of this operator: 0.330 ms
@@ -1274,21 +405,21 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
- conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=4)
- conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=4)
- conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=1)
+ conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+ conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+ conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
- conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
+ conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=7)
conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
- conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+ conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
- conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=16)
- conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=2)
- conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
+ conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+ conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
+ conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
@@ -1296,14 +427,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
- compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=16)
- compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=1)
+ compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+ compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
- compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
- compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+ compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+ compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -1323,14 +454,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
- kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+ kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
- pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+ pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
- s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 1024)
+ s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 16)
s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
CUDA source code:
@@ -1348,10 +479,10 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
#define int64_t long long
#define uint64_t unsigned long long
#endif
- extern "C" __global__ void __launch_bounds__(49) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
- float conv2d_nchw[16];
- __shared__ float pad_temp_shared[2016];
- __shared__ float kernel_shared[1536];
+ extern "C" __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+ float conv2d_nchw[14];
+ __shared__ float pad_temp_shared[1568];
+ __shared__ float kernel_shared[512];
conv2d_nchw[0] = 0.000000e+00f;
conv2d_nchw[1] = 0.000000e+00f;
conv2d_nchw[2] = 0.000000e+00f;
@@ -1366,864 +497,51 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
conv2d_nchw[11] = 0.000000e+00f;
conv2d_nchw[12] = 0.000000e+00f;
conv2d_nchw[13] = 0.000000e+00f;
- conv2d_nchw[14] = 0.000000e+00f;
- conv2d_nchw[15] = 0.000000e+00f;
for (int rc_outer_outer = 0; rc_outer_outer < 16; ++rc_outer_outer) {
- for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
- __syncthreads();
- pad_temp_shared[((int)threadIdx.x)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 49)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 49) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 98) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 147)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 147) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 196)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 196) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 245)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 245) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 294) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 343)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 343) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 392)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 392) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 441)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 335)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 490) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 539)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 539) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 588) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 637)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 637) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 686) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 735)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 735) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 784) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 833)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 833) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 882)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 678)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 931)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 931) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 980) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1029)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1029) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1078) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1127)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1127) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1176) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1225)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1225) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1274)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1274) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1323)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1021)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1372) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1421)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1421) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1470) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1519)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1519) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1568) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1617)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1617) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1666) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1715)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1715) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1764)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1364)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1813)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1813) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1862) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1911)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1911) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1960) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- if (((int)threadIdx.x) < 7) {
- pad_temp_shared[(((int)threadIdx.x) + 2009)] = 0.000000e+00f;
- }
- kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 49)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 49) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 49) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 98)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 98) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 2) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 147)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 147) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 51) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 196)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 196) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 4) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 245)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 245) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 53) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 294)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 294) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 6) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 343)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 343) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 55) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 392)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 8) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 441)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 441) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 57) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 490)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 490) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 10) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 539)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 539) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 59) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 588)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 588) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 12) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 637)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 637) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 61) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 686)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 686) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 14) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 735)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 735) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 63) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 784)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 16) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 833)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 833) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 65) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 882)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 882) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 18) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 931)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 931) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 67) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 980)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 980) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 20) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1029)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1029) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 69) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1078)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1078) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 22) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1127)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1127) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 71) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1176) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 24) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1225)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1225) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 73) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1274)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1274) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 26) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1323)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1323) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 75) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1372)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1372) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 28) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1421)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1421) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 77) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1470)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1470) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 30) * 3)) + rx_outer_outer)];
- if (((int)threadIdx.x) < 17) {
- kernel_shared[(((int)threadIdx.x) + 1519)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1519) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 79) * 3)) + rx_outer_outer)];
- }
- __syncthreads();
- for (int rc_outer_inner = 0; rc_outer_inner < 2; ++rc_outer_inner) {
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[(rc_outer_inner * 48)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 96)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 192)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 288)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 97)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 193)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 289)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 2)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 98)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 194)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 290)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 3)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 99)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 195)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 291)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 4)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 100)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 196)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 292)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 5)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 101)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 197)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 293)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 6)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 102)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 198)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 294)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 7)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 103)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 199)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 295)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 8)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 104)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 200)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 296)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 9)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 105)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 201)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 297)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 10)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 106)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 202)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 298)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 11)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 107)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 203)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 299)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 12)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 108)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 204)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 300)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 13)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 109)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 205)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 301)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 14)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 110)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 206)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 302)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 15)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 111)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 207)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 303)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 16)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 112)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 208)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 304)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 17)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 113)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 209)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 305)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 18)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 114)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 210)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 306)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 19)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 115)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 211)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 307)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 20)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 116)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 212)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 308)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 21)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 117)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 213)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 309)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 22)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 118)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 214)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 310)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 23)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 119)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 215)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 311)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 24)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 120)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 216)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 312)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 25)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 121)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 217)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 313)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 26)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 122)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 218)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 314)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 27)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 123)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 219)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 315)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 28)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 124)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 220)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 316)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 29)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 125)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 221)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 317)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 30)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 126)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 222)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 318)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 31)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 127)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 223)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 319)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 32)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 128)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 224)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 320)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 33)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 129)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 225)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 321)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 34)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 130)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 226)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 322)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 35)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 131)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 227)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 323)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 36)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 132)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 228)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 324)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 37)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 133)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 229)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 325)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 38)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 134)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 230)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 326)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 39)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 135)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 231)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 327)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 40)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 136)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 232)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 328)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 41)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 137)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 233)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 329)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 42)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 138)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 234)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 330)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 43)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 139)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 235)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 331)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 44)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 140)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 236)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 332)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 45)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 141)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 237)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 333)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 46)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 142)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 238)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 334)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 47)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 143)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 239)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 335)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 384)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 480)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 576)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 672)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 385)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 481)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 577)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 673)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 386)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 482)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 578)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 674)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 387)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 483)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 579)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 675)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 388)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 484)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 580)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 676)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 389)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 485)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 581)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 677)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 390)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 486)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 582)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 678)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 391)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 487)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 583)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 679)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 392)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 488)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 584)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 680)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 393)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 489)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 585)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 681)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 394)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 490)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 586)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 682)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 395)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 491)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 587)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 683)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 396)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 492)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 588)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 684)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 397)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 493)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 589)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 685)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 398)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 494)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 590)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 686)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 399)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 495)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 591)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 687)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 400)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 496)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 592)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 688)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 401)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 497)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 593)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 689)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 402)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 498)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 594)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 690)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 403)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 499)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 595)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 691)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 404)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 500)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 596)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 692)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 405)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 501)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 597)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 693)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 406)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 502)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 598)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 694)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 407)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 503)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 599)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 695)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 408)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 504)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 600)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 696)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 409)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 505)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 601)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 697)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 410)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 506)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 602)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 698)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 411)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 507)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 603)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 699)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 412)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 508)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 604)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 700)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 413)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 509)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 605)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 701)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 414)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 510)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 606)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 702)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 415)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 511)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 607)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 703)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 416)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 512)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 608)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 704)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 417)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 513)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 609)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 705)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 418)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 514)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 610)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 706)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 419)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 515)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 611)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 707)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 420)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 516)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 612)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 708)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 421)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 517)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 613)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 709)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 422)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 518)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 614)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 710)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 423)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 519)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 615)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 711)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 424)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 520)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 616)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 712)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 425)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 521)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 617)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 713)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 426)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 522)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 618)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 714)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 427)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 523)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 619)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 715)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 428)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 524)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 620)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 716)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 429)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 525)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 621)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 717)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 430)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 526)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 622)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 718)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 431)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 527)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 623)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 719)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 768)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 864)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 960)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1056)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 769)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 865)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 961)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1057)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 770)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 866)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 962)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1058)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 771)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 867)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 963)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1059)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 772)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 868)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 964)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1060)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 773)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 869)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 965)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1061)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 774)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 870)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 966)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1062)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 775)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 871)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 967)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1063)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 776)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 872)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 968)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1064)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 777)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 873)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 969)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1065)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 778)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 874)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 970)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1066)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 779)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 875)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 971)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1067)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 780)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 876)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 972)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1068)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 781)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 877)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 973)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1069)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 782)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 878)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 974)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1070)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 783)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 879)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 975)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1071)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 784)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 880)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 976)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1072)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 785)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 881)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 977)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1073)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 786)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 882)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 978)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1074)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 787)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 883)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 979)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1075)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 788)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 884)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 980)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1076)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 789)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 885)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 981)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1077)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 790)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 886)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 982)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1078)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 791)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 887)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 983)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1079)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 792)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 888)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 984)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1080)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 793)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 889)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 985)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1081)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 794)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 890)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 986)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1082)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 795)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 891)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 987)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1083)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 796)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 892)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 988)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1084)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 797)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 893)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 989)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1085)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 798)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 894)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 990)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1086)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 799)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 895)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 991)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1087)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 800)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 896)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 992)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1088)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 801)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 897)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 993)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1089)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 802)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 898)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 994)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1090)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 803)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 899)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 995)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1091)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 804)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 900)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 996)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1092)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 805)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 901)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 997)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1093)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 806)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 902)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 998)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1094)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 807)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 903)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 999)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1095)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 808)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 904)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1000)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1096)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 809)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 905)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1001)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1097)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 810)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 906)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1002)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1098)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 811)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 907)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1003)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1099)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 812)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 908)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1004)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1100)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 813)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 909)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1005)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1101)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 814)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 910)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1006)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1102)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 815)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 911)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1007)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1103)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1152)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1248)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1344)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1440)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1153)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1249)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1345)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1441)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1154)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1250)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1346)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1442)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1155)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1251)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1347)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1443)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1156)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1252)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1348)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1444)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1157)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1253)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1349)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1445)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1158)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1254)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1350)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1446)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1159)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1255)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1351)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1447)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1160)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1256)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1352)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1448)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1161)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1257)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1353)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1449)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1162)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1258)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1354)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1450)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1163)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1259)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1355)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1451)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1164)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1260)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1356)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1452)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1165)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1261)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1357)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1453)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1166)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1262)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1358)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1454)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1167)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1263)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1359)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1455)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1168)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1264)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1360)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1456)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1169)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1265)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1361)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1457)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1170)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1266)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1362)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1458)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1171)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1267)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1363)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1459)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1172)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1268)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1364)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1460)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1173)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1269)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1365)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1461)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1174)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1270)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1366)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1462)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1175)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1271)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1367)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1463)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1176)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1272)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1368)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1464)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1177)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1273)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1369)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1465)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1178)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1274)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1370)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1466)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1179)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1275)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1371)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1467)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1180)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1276)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1372)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1468)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1181)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1277)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1373)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 1469)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1182)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1278)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1374)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 1470)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1183)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1279)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1375)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 1471)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1184)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1280)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1376)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 1472)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1185)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1281)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1377)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 1473)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1186)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1282)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1378)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 1474)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1187)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1283)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1379)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 1475)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1188)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1284)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1380)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 1476)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1189)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1285)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1381)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 1477)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1190)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1286)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1382)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 1478)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1191)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1287)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1383)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 1479)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1192)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1288)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1384)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 1480)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1193)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1289)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1385)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 1481)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1194)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1290)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1386)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 1482)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1195)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1291)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1387)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 1483)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1196)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1292)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1388)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 1484)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1197)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1293)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1389)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 1485)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1198)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1294)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1390)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 1486)]));
- conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1199)]));
- conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1295)]));
- conv2d_nchw[14] = (conv2d_nchw[14] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1391)]));
- conv2d_nchw[15] = (conv2d_nchw[15] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 1487)]));
+ for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
+ for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
+ __syncthreads();
+ for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer < 28; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) {
+ pad_temp_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 56) + ((int)threadIdx.x))] = (((((1 <= (ry_outer_outer + (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 8) + (((int)threadIdx.x) / 7)) % 7))) && ((ry_outer_outer + (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 8) + (((int)threadIdx.x) / 7)) % 7)) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (ax [...]
+ }
+ kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) >> 5) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+ kernel_shared[(((int)threadIdx.x) + 56)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 56) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 24) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+ kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 112) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+ kernel_shared[(((int)threadIdx.x) + 168)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 168) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+ kernel_shared[(((int)threadIdx.x) + 224)] = kernel[(((((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) >> 5) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer) + 32256)];
+ kernel_shared[(((int)threadIdx.x) + 280)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 280) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 24) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+ kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 336) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+ kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) >> 5) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer)];
+ kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) >> 5) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) & 31) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer) + 64512)];
+ if (((int)threadIdx.x) < 8) {
+ kernel_shared[(((int)threadIdx.x) + 504)] = kernel[((((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 24) * 9)) + (ry_outer_outer * 3)) + rx_outer_outer) + 69120)];
+ }
+ __syncthreads();
+ for (int rc_outer_inner = 0; rc_outer_inner < 16; ++rc_outer_inner) {
+ for (int ff_outer_inner = 0; ff_outer_inner < 2; ++ff_outer_inner) {
+ conv2d_nchw[(ff_outer_inner * 7)] = (conv2d_nchw[(ff_outer_inner * 7)] + (pad_temp_shared[((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7))] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+ conv2d_nchw[((ff_outer_inner * 7) + 1)] = (conv2d_nchw[((ff_outer_inner * 7) + 1)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+ conv2d_nchw[((ff_outer_inner * 7) + 2)] = (conv2d_nchw[((ff_outer_inner * 7) + 2)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+ conv2d_nchw[((ff_outer_inner * 7) + 3)] = (conv2d_nchw[((ff_outer_inner * 7) + 3)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+ conv2d_nchw[((ff_outer_inner * 7) + 4)] = (conv2d_nchw[((ff_outer_inner * 7) + 4)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+ conv2d_nchw[((ff_outer_inner * 7) + 5)] = (conv2d_nchw[((ff_outer_inner * 7) + 5)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+ conv2d_nchw[((ff_outer_inner * 7) + 6)] = (conv2d_nchw[((ff_outer_inner * 7) + 6)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2))]));
+ conv2d_nchw[(ff_outer_inner * 7)] = (conv2d_nchw[(ff_outer_inner * 7)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 49)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+ conv2d_nchw[((ff_outer_inner * 7) + 1)] = (conv2d_nchw[((ff_outer_inner * 7) + 1)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 50)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+ conv2d_nchw[((ff_outer_inner * 7) + 2)] = (conv2d_nchw[((ff_outer_inner * 7) + 2)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 51)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+ conv2d_nchw[((ff_outer_inner * 7) + 3)] = (conv2d_nchw[((ff_outer_inner * 7) + 3)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 52)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+ conv2d_nchw[((ff_outer_inner * 7) + 4)] = (conv2d_nchw[((ff_outer_inner * 7) + 4)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 53)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+ conv2d_nchw[((ff_outer_inner * 7) + 5)] = (conv2d_nchw[((ff_outer_inner * 7) + 5)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 54)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+ conv2d_nchw[((ff_outer_inner * 7) + 6)] = (conv2d_nchw[((ff_outer_inner * 7) + 6)] + (pad_temp_shared[(((rc_outer_inner * 98) + ((((int)threadIdx.x) % 7) * 7)) + 55)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 64) + (ff_outer_inner * 32)) + (rc_outer_inner * 2)) + 1)]));
+ }
+ }
}
}
}
- for (int i1_inner = 0; i1_inner < 16; ++i1_inner) {
- compute[(((((int)blockIdx.x) * 784) + (i1_inner * 49)) + ((int)threadIdx.x))] = max((conv2d_nchw[i1_inner] + bias[((((int)blockIdx.x) * 16) + i1_inner)]), 0.000000e+00f);
+ for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
+ for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
+ compute[(((((((int)blockIdx.x) * 784) + ((((int)threadIdx.x) / 7) * 98)) + (i1_inner * 49)) + ((((int)threadIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[(((((int)blockIdx.x) * 16) + ((((int)threadIdx.x) / 7) * 2)) + i1_inner)]), 0.000000e+00f);
+ }
}
}
@@ -2282,7 +600,7 @@ In the example below we resume the status and do more 5 trials.
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 2 minutes 28.084 seconds)
+ **Total running time of the script:** ( 2 minutes 19.891 seconds)
.. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index 144ebb904..a3e6315d4 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -614,7 +614,7 @@ so we can read the log file and load the best schedules.
Evaluate inference time cost...
Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 9.6952 9.7011 9.7414 9.6430 0.0404
+ 9.7473 9.7540 9.7861 9.7017 0.0348
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index 970decefb..7a6804ca8 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -633,7 +633,7 @@ so we can read the log file and load the best schedules.
Evaluate inference time cost...
Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 771.7962 773.1405 776.8791 765.3691 4.7941
+ 746.9453 746.1865 751.6476 743.0019 3.5701
@@ -658,7 +658,7 @@ Other Tips
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 1 minutes 18.796 seconds)
+ **Total running time of the script:** ( 1 minutes 17.690 seconds)
.. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index cdb878c29..66b904b6c 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -362,119 +362,75 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
- preflattened_buffer_map = {placeholder_5: placeholder_15: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_16: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_9: placeholder_18: Buffer(placeholder_14, float32, [128, 512], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
- for (i0.outer.i1.outer.fused: int32, 0, 256) "parallel" {
- allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
- for (i.outer.inner: int32, 0, 8) {
- let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32)
- let cse_var_1: int32 = (i.outer.inner*32)
- {
- compute_5: Buffer(compute_4, float32, [256], [])[cse_var_1] = 0f32
- compute_5[(cse_var_1 + 1)] = 0f32
- compute_5[(cse_var_1 + 2)] = 0f32
- compute_5[(cse_var_1 + 3)] = 0f32
- compute_5[(cse_var_1 + 4)] = 0f32
- compute_5[(cse_var_1 + 5)] = 0f32
- compute_5[(cse_var_1 + 6)] = 0f32
- compute_5[(cse_var_1 + 7)] = 0f32
- compute_5[(cse_var_1 + 8)] = 0f32
- compute_5[(cse_var_1 + 9)] = 0f32
- compute_5[(cse_var_1 + 10)] = 0f32
- compute_5[(cse_var_1 + 11)] = 0f32
- compute_5[(cse_var_1 + 12)] = 0f32
- compute_5[(cse_var_1 + 13)] = 0f32
- compute_5[(cse_var_1 + 14)] = 0f32
- compute_5[(cse_var_1 + 15)] = 0f32
- compute_5[(cse_var_1 + 16)] = 0f32
- compute_5[(cse_var_1 + 17)] = 0f32
- compute_5[(cse_var_1 + 18)] = 0f32
- compute_5[(cse_var_1 + 19)] = 0f32
- compute_5[(cse_var_1 + 20)] = 0f32
- compute_5[(cse_var_1 + 21)] = 0f32
- compute_5[(cse_var_1 + 22)] = 0f32
- compute_5[(cse_var_1 + 23)] = 0f32
- compute_5[(cse_var_1 + 24)] = 0f32
- compute_5[(cse_var_1 + 25)] = 0f32
- compute_5[(cse_var_1 + 26)] = 0f32
- compute_5[(cse_var_1 + 27)] = 0f32
- compute_5[(cse_var_1 + 28)] = 0f32
- compute_5[(cse_var_1 + 29)] = 0f32
- compute_5[(cse_var_1 + 30)] = 0f32
- compute_5[(cse_var_1 + 31)] = 0f32
- for (elem_idx: int32, 0, (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
- let cse_var_35: int32 = (cse_var_1 + 1)
- let cse_var_34: int32 = (cse_var_1 + 10)
- let cse_var_33: int32 = (cse_var_1 + 11)
- let cse_var_32: int32 = (cse_var_1 + 12)
- let cse_var_31: int32 = (cse_var_1 + 13)
- let cse_var_30: int32 = (cse_var_1 + 14)
- let cse_var_29: int32 = (cse_var_1 + 15)
- let cse_var_28: int32 = (cse_var_1 + 16)
- let cse_var_27: int32 = (cse_var_1 + 17)
- let cse_var_26: int32 = (cse_var_1 + 18)
- let cse_var_25: int32 = (cse_var_1 + 19)
- let cse_var_24: int32 = (cse_var_1 + 2)
- let cse_var_23: int32 = (cse_var_1 + 20)
- let cse_var_22: int32 = (cse_var_1 + 21)
- let cse_var_21: int32 = (cse_var_1 + 22)
- let cse_var_20: int32 = (cse_var_1 + 24)
+ preflattened_buffer_map = {placeholder_6: placeholder_15: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_17: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_18: Buffer(placeholder_13, int32, [33], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], [])} {
+ for (i0.outer.i1.outer.fused: int32, 0, 32) "parallel" {
+ allocate(compute_4: Pointer(global float32), float32, [2048]), storage_scope = global {
+ for (i.outer.inner: int32, 0, 2) {
+ for (i.inner.init: int32, 0, 64) {
+ let cse_var_1: int32 = ((i.outer.inner*1024) + (i.inner.init*16))
+ {
+ compute_5: Buffer(compute_4, float32, [2048], [])[cse_var_1] = 0f32
+ compute_5[(cse_var_1 + 1)] = 0f32
+ compute_5[(cse_var_1 + 2)] = 0f32
+ compute_5[(cse_var_1 + 3)] = 0f32
+ compute_5[(cse_var_1 + 4)] = 0f32
+ compute_5[(cse_var_1 + 5)] = 0f32
+ compute_5[(cse_var_1 + 6)] = 0f32
+ compute_5[(cse_var_1 + 7)] = 0f32
+ compute_5[(cse_var_1 + 8)] = 0f32
+ compute_5[(cse_var_1 + 9)] = 0f32
+ compute_5[(cse_var_1 + 10)] = 0f32
+ compute_5[(cse_var_1 + 11)] = 0f32
+ compute_5[(cse_var_1 + 12)] = 0f32
+ compute_5[(cse_var_1 + 13)] = 0f32
+ compute_5[(cse_var_1 + 14)] = 0f32
+ compute_5[(cse_var_1 + 15)] = 0f32
+ }
+ }
+ for (elem_idx: int32, 0, (placeholder_3[(i0.outer.i1.outer.fused + 1)] - placeholder_3[i0.outer.i1.outer.fused])) {
+ for (i.inner: int32, 0, 64) {
let cse_var_19: int32 = (elem_idx*16)
- let cse_var_18: int32 = (cse_var_1 + 9)
- let cse_var_17: int32 = (cse_var_1 + 8)
- let cse_var_16: int32 = (cse_var_1 + 7)
- let cse_var_15: int32 = (cse_var_1 + 6)
- let cse_var_14: int32 = (cse_var_1 + 5)
- let cse_var_13: int32 = (cse_var_1 + 4)
- let cse_var_12: int32 = (cse_var_1 + 23)
- let cse_var_11: int32 = (cse_var_1 + 30)
- let cse_var_10: int32 = (cse_var_1 + 3)
- let cse_var_9: int32 = (cse_var_1 + 29)
- let cse_var_8: int32 = (cse_var_1 + 28)
- let cse_var_7: int32 = (cse_var_1 + 27)
- let cse_var_6: int32 = (cse_var_1 + 26)
- let cse_var_5: int32 = (cse_var_1 + 25)
- let cse_var_4: int32 = (cse_var_1 + 31)
- let cse_var_3: int32 = ((floordiv(i0.outer.i1.outer.fused, 32)*4096) + (i.outer.inner*512))
+ let cse_var_18: int32 = ((i.outer.inner*16384) + (i.inner*256))
+ let cse_var_17: int32 = ((i.outer.inner*1024) + (i.inner*16))
+ let cse_var_16: int32 = (cse_var_17 + 1)
+ let cse_var_15: int32 = (cse_var_17 + 11)
+ let cse_var_14: int32 = (cse_var_17 + 12)
+ let cse_var_13: int32 = (cse_var_17 + 13)
+ let cse_var_12: int32 = (cse_var_17 + 14)
+ let cse_var_11: int32 = (cse_var_17 + 15)
+ let cse_var_10: int32 = (cse_var_17 + 2)
+ let cse_var_9: int32 = (cse_var_17 + 3)
+ let cse_var_8: int32 = (cse_var_17 + 4)
+ let cse_var_7: int32 = (cse_var_17 + 5)
+ let cse_var_6: int32 = (cse_var_17 + 6)
+ let cse_var_5: int32 = (cse_var_17 + 7)
+ let cse_var_4: int32 = (cse_var_17 + 8)
+ let cse_var_3: int32 = (cse_var_17 + 9)
+ let cse_var_2: int32 = (cse_var_17 + 10)
{
- compute_5[cse_var_1] = (compute_5[cse_var_1] + (placeholder_1[((placeholder_3[cse_var_2]*16) + cse_var_19)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_35] = (compute_5[cse_var_35] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 1)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_24] = (compute_5[cse_var_24] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 2)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 3)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 4)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 5)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 6)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 7)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 8)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 9)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_34] = (compute_5[cse_var_34] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 10)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_33] = (compute_5[cse_var_33] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 11)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_32] = (compute_5[cse_var_32] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 12)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_31] = (compute_5[cse_var_31] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 13)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_30] = (compute_5[cse_var_30] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 14)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_29] = (compute_5[cse_var_29] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 15)]*max(placeholder[(cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)])], 0f32)))
- compute_5[cse_var_28] = (compute_5[cse_var_28] + (placeholder_1[((placeholder_3[cse_var_2]*16) + cse_var_19)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_27] = (compute_5[cse_var_27] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 1)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_26] = (compute_5[cse_var_26] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 2)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_25] = (compute_5[cse_var_25] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 3)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_23] = (compute_5[cse_var_23] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 4)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_22] = (compute_5[cse_var_22] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 5)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_21] = (compute_5[cse_var_21] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 6)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 7)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 8)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 9)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 10)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 11)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 12)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 13)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 14)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
- compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_2]*16) + cse_var_19) + 15)]*max(placeholder[((cse_var_3 + placeholder_2[(placeholder_3[cse_var_2] + elem_idx)]) + 256)], 0f32)))
+ compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_2] = (compute_5[cse_var_2] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+ compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + cse_var_19) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
}
}
}
}
- for (i0.inner: int32, 0, 16) {
- let cse_var_36: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
- compute[ramp(cse_var_36, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_36, 1, 16)]), broadcast(0f32, 16))
+ for (i0.inner: int32, 0, 128) {
+ let cse_var_20: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*16))
+ compute[ramp(cse_var_20, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_20, 1, 16)]), broadcast(0f32, 16))
}
}
}
@@ -528,7 +484,7 @@ We build the binary and check its correctness and performance.
.. code-block:: none
- Execution time of this operator: 3.549 ms
+ Execution time of this operator: 1.827 ms
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index 3f2f54888..333dc9e5c 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
Computation times
=================
-**00:44.106** total execution time for **how_to_tune_with_autotvm** files:
+**00:44.100** total execution time for **how_to_tune_with_autotvm** files:
-- **00:43.278**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
-- **00:00.215**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
-- **00:00.207**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
-- **00:00.204**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
-- **00:00.203**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+- **00:43.294**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
+- **00:00.208**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
+- **00:00.208**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
+- **00:00.195**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
+- **00:00.194**: :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 63cf95880..fd5b0177f 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -859,8 +859,8 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2885496
- No: 6 GFLOPS: 42.30/42.30 result: MeasureResult(costs=(0.005473275105263158,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.56661057472229, timestamp=1652754069.3949578) [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
- No: 7 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 6 GFLOPS: 93.19/93.19 result: MeasureResult(costs=(0.0024842372291666664,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7560157775878906, timestamp=1652782145.6015983) [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
+ No: 7 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -983,7 +983,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 1, 16, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6225319
- No: 8 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 8 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1106,7 +1106,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,943546
- No: 9 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 9 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1229,7 +1229,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2868708
- No: 10 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 10 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
res = future.result()
File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1247,7 +1247,7 @@ for this template
TimeoutError
[('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4691833
- No: 11 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 11 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1370,7 +1370,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1042124
- No: 12 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 12 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1493,7 +1493,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 32, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10013405
- No: 13 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 13 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1616,7 +1616,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6732082
- No: 14 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 14 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1739,7 +1739,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7536735
- No: 15 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 15 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1862,7 +1862,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 2, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,482121
- No: 16 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 16 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -1985,7 +1985,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2824525
- No: 17 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 17 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -2108,7 +2108,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4559286
- No: 18 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 18 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 571, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 523, in _build_func_common
@@ -2231,7 +2231,7 @@ for this template
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 854, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel [('tile_f', [-1, 1, 32, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9677544
- No: 19 GFLOPS: 0.00/42.30 result: Traceback (most recent call last):
+ No: 19 GFLOPS: 0.00/93.19 result: Traceback (most recent call last):
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 721, in __call__
yield remote, remote.load_module(os.path.split(build_result.filename)[1])
File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 685, in run_through_rpc
@@ -2319,7 +2319,7 @@ for this template
15: _PyEval_EvalFrameDefault
14: 0x0000000000537c30
13: _PyObject_FastCallKeywords
- 12: 0x00007f3d1d202fa2
+ 12: 0x00007f5b9a242fa2
11: _ctypes_callproc
10: ffi_call
9: ffi_call_unix64
@@ -2384,7 +2384,7 @@ for this template
21: _PyFunction_FastCallKeywords
20: _PyEval_EvalFrameDefault
19: _PyFunction_FastCall [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6390073
- No: 20 GFLOPS: 143.71/143.71 result: MeasureResult(costs=(0.00161091818,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4025070667266846, timestamp=1652754095.8003697) [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
+ No: 20 GFLOPS: 144.66/144.66 result: MeasureResult(costs=(0.00160031061,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3949148654937744, timestamp=1652782171.9501283) [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
@@ -2437,7 +2437,7 @@ and measure running time.
Best config:
[('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
- Time cost of this operator: 0.002074
+ Time cost of this operator: 0.001957
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index d19cd1ccb..34a165576 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -292,10 +292,10 @@ Timing the untuned program
########## Build without Autotuning ##########
Node Name Ops Time(us) Time(%) Shape Inputs Outputs
--------- --- -------- ------- ----- ------ -------
- tvmgen_default_fused_nn_contrib_conv2d_NCHWc tvmgen_default_fused_nn_contrib_conv2d_NCHWc 309.3 98.755 (1, 2, 10, 10, 3) 2 1
- tvmgen_default_fused_layout_transform_1 tvmgen_default_fused_layout_transform_1 3.0 0.958 (1, 6, 10, 10) 1 1
- tvmgen_default_fused_layout_transform tvmgen_default_fused_layout_transform 0.901 0.288 (1, 1, 10, 10, 3) 1 1
- Total_time - 313.201 - - - -
+ tvmgen_default_fused_nn_contrib_conv2d_NCHWc tvmgen_default_fused_nn_contrib_conv2d_NCHWc 310.7 98.722 (1, 2, 10, 10, 3) 2 1
+ tvmgen_default_fused_layout_transform_1 tvmgen_default_fused_layout_transform_1 3.121 0.992 (1, 6, 10, 10) 1 1
+ tvmgen_default_fused_layout_transform tvmgen_default_fused_layout_transform 0.901 0.286 (1, 1, 10, 10, 3) 1 1
+ Total_time - 314.722 - - - -
@@ -357,10 +357,10 @@ Timing the tuned program
########## Build with Autotuning ##########
Node Name Ops Time(us) Time(%) Shape Inputs Outputs
--------- --- -------- ------- ----- ------ -------
- tvmgen_default_fused_nn_contrib_conv2d_NCHWc tvmgen_default_fused_nn_contrib_conv2d_NCHWc 197.2 98.6 (1, 1, 10, 10, 6) 2 1
- tvmgen_default_fused_layout_transform_1 tvmgen_default_fused_layout_transform_1 1.9 0.95 (1, 6, 10, 10) 1 1
- tvmgen_default_fused_layout_transform tvmgen_default_fused_layout_transform 0.9 0.45 (1, 3, 10, 10, 1) 1 1
- Total_time - 200.0 - - - -
+ tvmgen_default_fused_nn_contrib_conv2d_NCHWc tvmgen_default_fused_nn_contrib_conv2d_NCHWc 118.5 97.805 (1, 6, 10, 10, 1) 2 1
+ tvmgen_default_fused_layout_transform_1 tvmgen_default_fused_layout_transform_1 1.738 1.434 (1, 6, 10, 10) 1 1
+ tvmgen_default_fused_layout_transform tvmgen_default_fused_layout_transform 0.922 0.761 (1, 1, 10, 10, 3) 1 1
+ Total_time - 121.16 - - - -
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index a9aad1ddf..97e84f92f 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
Computation times
=================
-**00:45.444** total execution time for **how_to_work_with_microtvm** files:
+**00:45.243** total execution time for **how_to_work_with_microtvm** files:
-- **00:41.316**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)
-- **00:03.538**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)
-- **00:00.199**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)
-- **00:00.196**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``)
-- **00:00.195**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tvmc.py` (``micro_tvmc.py``)
+- **00:41.099**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)
+- **00:03.547**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)
+- **00:00.237**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)
+- **00:00.182**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_tvmc.py` (``micro_tvmc.py``)
+- **00:00.178**: :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``)
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 98cd4f2d1..667485ad0 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
Computation times
=================
-**00:08.814** total execution time for **how_to_work_with_relay** files:
+**00:08.838** total execution time for **how_to_work_with_relay** files:
-- **00:06.941**: :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)
-- **00:01.660**: :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)
-- **00:00.213**: :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)
+- **00:06.807**: :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)
+- **00:01.833**: :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)
+- **00:00.199**: :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index d78916e07..93d141838 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
Computation times
=================
-**00:05.609** total execution time for **how_to_work_with_schedules** files:
+**00:05.499** total execution time for **how_to_work_with_schedules** files:
-- **00:02.071**: :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)
-- **00:01.126**: :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)
-- **00:00.719**: :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)
-- **00:00.718**: :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)
-- **00:00.296**: :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)
-- **00:00.239**: :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``)
-- **00:00.225**: :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)
-- **00:00.214**: :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)
+- **00:02.037**: :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)
+- **00:01.136**: :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)
+- **00:00.703**: :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)
+- **00:00.687**: :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)
+- **00:00.288**: :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)
+- **00:00.231**: :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``)
+- **00:00.216**: :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)
+- **00:00.202**: :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index 090ecdc47..a56b2ef7c 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -318,7 +318,7 @@ The importing needs to happen before the tensorized GEMV being executed.
C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
buffer_map = {A_1: A, B_1: B, C_1: C}
preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
- attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpl1ncbzhq/input0.cc'\nsource_filename = \"/tmp/tmpl1ncbzhq/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n %7 = alloca float*, align 8\n %8 = alloca float*, align 8\n %9 = alloca floa [...]
+ attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpv01nlxvs/input0.cc'\nsource_filename = \"/tmp/tmpv01nlxvs/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n %7 = alloca float*, align 8\n %8 = alloca float*, align 8\n %9 = alloca floa [...]
for (i, 0, 1024) {
for (j.outer: int32, 0, 32) {
@tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 1ba0a79e0..61d434186 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
Computation times
=================
-**00:19.778** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:20.165** total execution time for **topic_vta_tutorials_autotvm** files:
-- **00:19.592**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
-- **00:00.186**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)
+- **00:19.983**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
+- **00:00.183**: :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 25b5a0998..7144b0f98 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -265,7 +265,7 @@ The compilation steps are:
DeprecationWarning,
/workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the new recommended usage.
relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
- resnet18_v1 inference graph built in 20.80s!
+ resnet18_v1 inference graph built in 20.70s!
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index b864140ce..65d9cfb09 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -301,7 +301,7 @@ The compilation steps are:
/workspace/python/tvm/relay/build_module.py:431: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
DeprecationWarning,
- yolov3-tiny inference graph built in 14.53s!
+ yolov3-tiny inference graph built in 14.58s!
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index 257846bfa..c3c196b45 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
Computation times
=================
-**01:27.391** total execution time for **topic_vta_tutorials_frontend** files:
+**01:27.320** total execution time for **topic_vta_tutorials_frontend** files:
-- **00:46.427**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
-- **00:40.964**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
+- **00:46.491**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
+- **00:40.829**: :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 25b6f1945..80c6cd410 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
Computation times
=================
-**00:03.534** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.460** total execution time for **topic_vta_tutorials_optimize** files:
-- **00:02.997**: :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
-- **00:00.537**: :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
+- **00:02.934**: :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
+- **00:00.526**: :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index 4d7578589..21d8aad3c 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
Computation times
=================
-**00:00.954** total execution time for **topic_vta_tutorials** files:
+**00:00.977** total execution time for **topic_vta_tutorials** files:
-- **00:00.482**: :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
-- **00:00.472**: :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
+- **00:00.493**: :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
+- **00:00.484**: :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index 69bad7ac9..f70e34f58 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -184,7 +184,7 @@ trials, we can load the best schedule from the log file and apply it.
.. code-block:: none
-
+ *E
@@ -306,7 +306,7 @@ We build the binary and check its correctness and performance.
.. code-block:: none
- Execution time of this operator: 94.705 ms
+ Execution time of this operator: 93.749 ms
@@ -415,6 +415,11 @@ Expression (TE) language that demonstrates how TVM can optimize computational
operations.
+.. rst-class:: sphx-glr-timing
+
+ **Total running time of the script:** ( 1 minutes 11.014 seconds)
+
+
.. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index 80a05dc55..8a4e18ec9 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -271,7 +271,7 @@ standard deviation.
.. code-block:: none
- {'mean': 492.0485737900015, 'median': 491.6691768000021, 'std': 0.8640093566493087}
+ {'mean': 490.8931876000008, 'median': 490.8230106499957, 'std': 0.5127112764728715}
@@ -485,31 +485,31 @@ the tuning data to.
.. code-block:: none
-
[Task 1/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 1/25] Current/Best: 17.55/ 17.55 GFLOPS | Progress: (4/20) | 5.86 s
[Task 1/25] Current/Best: 6.17/ 17.55 GFLOPS | Progress: (8/20) | 8.79 s
[Task 1/25] Current/Best: 11.56/ 22.87 GFLOPS | Progress: (12/20) | 11.23 s
[Task 1/25] Current/Best: 16.80/ 22.87 GFLOPS | Progress: (16/20) | 12.90 s
[Task 1/25] Current/Best: 11.64/ 23.94 GFLOPS | Progress: (20/20) | 14.61 s Done.
-
[Task 2/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 2/25] Current/Best: 12.16/ 13.14 GFLOPS | Progress: (4/20) | 3.78 s
[Task 2/25] Current/Best: 13.94/ 18.32 GFLOPS | Progress: (8/20) | 5.05 s
[Task 2/25] Current/Best: 21.19/ 21.19 GFLOPS | Progress: (12/20) | 6.34 s
[Task 2/25] Current/Best: 12.37/ 21.19 GFLOPS | Progress: (16/20) | 7.59 s
[Task 2/25] Current/Best: 19.47/ 21.19 GFLOPS | Progress: (20/20) | 9.19 s Done.
-
[Task 3/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 3/25] Current/Best: 1.63/ 10.54 GFLOPS | Progress: (4/20) | 5.76 s
[Task 3/25] Current/Best: 15.60/ 16.89 GFLOPS | Progress: (8/20) | 7.65 s
[Task 3/25] Current/Best: 14.91/ 16.89 GFLOPS | Progress: (12/20) | 9.34 s
[Task 3/25] Current/Best: 7.17/ 23.80 GFLOPS | Progress: (16/20) | 11.22 s
[Task 3/25] Current/Best: 12.70/ 23.80 GFLOPS | Progress: (20/20) | 15.73 s Done.
-
[Task 4/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 4/25] Current/Best: 9.55/ 20.43 GFLOPS | Progress: (4/20) | 2.27 s
[Task 4/25] Current/Best: 6.51/ 20.43 GFLOPS | Progress: (8/20) | 7.02 s
[Task 4/25] Current/Best: 22.42/ 22.42 GFLOPS | Progress: (12/20) | 11.87 s
[Task 4/25] Current/Best: 16.79/ 22.42 GFLOPS | Progress: (16/20) | 14.27 s
[Task 4/25] Current/Best: 13.38/ 22.42 GFLOPS | Progress: (20/20) | 16.36 s Done.
-
[Task 5/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 5/25] Current/Best: 9.77/ 10.46 GFLOPS | Progress: (4/20) | 2.48 s
[Task 5/25] Current/Best: 11.72/ 12.79 GFLOPS | Progress: (8/20) | 4.52 s
[Task 5/25] Current/Best: 11.79/ 18.08 GFLOPS | Progress: (12/20) | 7.71 s
[Task 5/25] Current/Best: 11.88/ 22.89 GFLOPS | Progress: (16/20) | 9.17 s
[Task 5/25] Current/Best: 12.02/ 22.89 GFLOPS | Progress: (20/20) | 11.03 s Done.
-
[Task 6/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 6/25] Current/Best: 12.20/ 20.81 GFLOPS | Progress: (4/20) | 4.02 s
[Task 6/25] Current/Best: 19.02/ 20.81 GFLOPS | Progress: (8/20) | 5.78 s
[Task 6/25] Current/Best: 13.33/ 20.81 GFLOPS | Progress: (12/20) | 7.71 s
[Task 6/25] Current/Best: 19.91/ 20.81 GFLOPS | Progress: (16/20) | 9.94 s
[Task 6/25] Current/Best: 3.73/ 20.81 GFLOPS | Progress: (20/20) | 12.44 s Done.
-
[Task 7/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 7/25] Current/Best: 11.23/ 12.22 GFLOPS | Progress: (4/20) | 3.51 s
[Task 7/25] Current/Best: 20.27/ 21.02 GFLOPS | Progress: (8/20) | 5.00 s
[Task 7/25] Current/Best: 15.60/ 21.02 GFLOPS | Progress: (12/20) | 6.90 s
[Task 7/25] Current/Best: 12.29/ 21.02 GFLOPS | Progress: (16/20) | 8.93 s
[Task 7/25] Current/Best: 6.38/ 21.77 GFLOPS | Progress: (20/20) | 11.36 s Done.
-
[Task 8/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 8/25] Current/Best: 10.00/ 13.99 GFLOPS | Progress: (4/20) | 2.83 s
[Task 8/25] Current/Best: 9.53/ 13.99 GFLOPS | Progress: (8/20) | 7.90 s
[Task 8/25] Current/Best: 12.47/ 13.99 GFLOPS | Progress: (12/20) | 14.31 s
[Task 8/25] Current/Best: 18.79/ 18.79 GFLOPS | Progress: (16/20) | 16.41 s
[Task 8/25] Current/Best: 19.48/ 19.48 GFLOPS | Progress: (20/20) | 23.48 s Done.
-
[Task 9/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 9/25] Current/Best: 14.40/ 15.45 GFLOPS | Progress: (4/20) | 18.81 s
[Task 9/25] Current/Best: 23.49/ 23.49 GFLOPS | Progress: (8/20) | 20.52 s
[Task 9/25] Current/Best: 8.25/ 23.49 GFLOPS | Progress: (12/20) | 23.06 s
[Task 9/25] Current/Best: 18.02/ 23.49 GFLOPS | Progress: (16/20) | 25.93 s
[Task 9/25] Current/Best: 9.13/ 23.49 GFLOPS | Progress: (20/20) | 34.58 s
[Task 10/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 10/25] Current/Best: 18.09/ 18.09 GFLOPS | Progress: (4/20) | 2.45 s
[Task 10/25] Current/Best: 15.56/ 18.09 GFLOPS | Progress: (8/20) | 4.06 s
[Task 10/25] Current/Best: 12.19/ 18.92 GFLOPS | Progress: (12/20) | 5.59 s
[Task 10/25] Current/Best: 19.14/ 20.46 GFLOPS | Progress: (16/20) | 6.67 s
[Task 10/25] Current/Best: 8.69/ 20.46 GFLOPS | Progress: (20/20
) | 8.19 s Done.
-
[Task 11/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 11/25] Current/Best: 12.33/ 18.08 GFLOPS | Progress: (4/20) | 3.19 s
[Task 11/25] Current/Best: 17.11/ 18.08 GFLOPS | Progress: (8/20) | 6.01 s
[Task 11/25] Current/Best: 18.19/ 18.19 GFLOPS | Progress: (12/20) | 8.03 s
[Task 11/25] Current/Best: 11.86/ 21.22 GFLOPS | Progress: (16/20) | 10.99 s
[Task 11/25] Current/Best: 19.48/ 21.54 GFLOPS | Progress: (20/20) | 13.08 s Done.
-
[Task 12/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 12/25] Current/Best: 7.83/ 17.94 GFLOPS | Progress: (4/20) | 5.70 s
[Task 12/25] Current/Best: 5.26/ 17.94 GFLOPS | Progress: (8/20) | 9.60 s
[Task 12/25] Current/Best: 18.87/ 18.93 GFLOPS | Progress: (12/20) | 11.58 s
[Task 12/25] Current/Best: 15.47/ 18.93 GFLOPS | Progress: (16/20) | 14.52 s
[Task 12/25] Current/Best: 15.16/ 18.93 GFLOPS | Progress: (20/20) | 16.42 s Done.
-
[Task 13/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 13/25] Current/Best: 8.64/ 17.28 GFLOPS | Progress: (4/20) | 3.64 s
[Task 13/25] Current/Best: 15.94/ 21.11 GFLOPS | Progress: (8/20) | 6.26 s
[Task 13/25] Current/Best: 19.57/ 21.67 GFLOPS | Progress: (12/20) | 9.21 s
[Task 13/25] Current/Best: 12.28/ 21.67 GFLOPS | Progress: (16/20) | 12.63 s
[Task 13/25] Current/Best: 17.57/ 21.67 GFLOPS | Progress: (20/20) | 14.99 s Done.
-
[Task 14/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 14/25] Current/Best: 13.62/ 13.62 GFLOPS | Progress: (4/20) | 3.34 s
[Task 14/25] Current/Best: 5.94/ 13.62 GFLOPS | Progress: (8/20) | 5.57 s
[Task 14/25] Current/Best: 18.74/ 19.07 GFLOPS | Progress: (12/20) | 8.24 s
[Task 14/25] Current/Best: 16.41/ 19.07 GFLOPS | Progress: (16/20) | 10.15 s
[Task 14/25] Current/Best: 17.38/ 19.07 GFLOPS | Progress: (20/20) | 11.91 s
[Task 15/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
+
[Task 1/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 1/25] Current/Best: 17.57/ 17.57 GFLOPS | Progress: (4/20) | 5.89 s
[Task 1/25] Current/Best: 6.17/ 17.57 GFLOPS | Progress: (8/20) | 8.79 s
[Task 1/25] Current/Best: 11.53/ 22.85 GFLOPS | Progress: (12/20) | 11.22 s
[Task 1/25] Current/Best: 16.80/ 22.87 GFLOPS | Progress: (16/20) | 12.88 s
[Task 1/25] Current/Best: 11.64/ 23.94 GFLOPS | Progress: (20/20) | 14.58 s Done.
+
[Task 2/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 2/25] Current/Best: 12.33/ 12.89 GFLOPS | Progress: (4/20) | 3.79 s
[Task 2/25] Current/Best: 13.96/ 18.35 GFLOPS | Progress: (8/20) | 5.06 s
[Task 2/25] Current/Best: 21.30/ 21.30 GFLOPS | Progress: (12/20) | 6.38 s
[Task 2/25] Current/Best: 12.68/ 21.30 GFLOPS | Progress: (16/20) | 7.66 s
[Task 2/25] Current/Best: 20.15/ 21.30 GFLOPS | Progress: (20/20) | 9.26 s Done.
+
[Task 3/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 3/25] Current/Best: 1.63/ 10.59 GFLOPS | Progress: (4/20) | 5.74 s
[Task 3/25] Current/Best: 15.56/ 16.75 GFLOPS | Progress: (8/20) | 7.64 s
[Task 3/25] Current/Best: 14.92/ 16.75 GFLOPS | Progress: (12/20) | 9.32 s
[Task 3/25] Current/Best: 7.20/ 23.74 GFLOPS | Progress: (16/20) | 11.23 s
[Task 3/25] Current/Best: 12.11/ 23.74 GFLOPS | Progress: (20/20) | 15.74 s Done.
+
[Task 4/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 4/25] Current/Best: 9.54/ 20.48 GFLOPS | Progress: (4/20) | 2.27 s
[Task 4/25] Current/Best: 6.73/ 20.48 GFLOPS | Progress: (8/20) | 7.00 s
[Task 4/25] Current/Best: 22.44/ 22.44 GFLOPS | Progress: (12/20) | 11.95 s
[Task 4/25] Current/Best: 17.01/ 22.44 GFLOPS | Progress: (16/20) | 14.34 s
[Task 4/25] Current/Best: 13.37/ 22.44 GFLOPS | Progress: (20/20) | 16.30 s Done.
+
[Task 5/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 5/25] Current/Best: 9.60/ 10.37 GFLOPS | Progress: (4/20) | 2.48 s
[Task 5/25] Current/Best: 11.92/ 12.77 GFLOPS | Progress: (8/20) | 4.55 s
[Task 5/25] Current/Best: 11.86/ 18.07 GFLOPS | Progress: (12/20) | 7.70 s
[Task 5/25] Current/Best: 11.96/ 22.69 GFLOPS | Progress: (16/20) | 9.10 s
[Task 5/25] Current/Best: 12.15/ 22.69 GFLOPS | Progress: (20/20) | 10.98 s Done.
+
[Task 6/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 6/25] Current/Best: 12.22/ 20.71 GFLOPS | Progress: (4/20) | 4.03 s
[Task 6/25] Current/Best: 19.06/ 20.71 GFLOPS | Progress: (8/20) | 5.76 s
[Task 6/25] Current/Best: 13.28/ 20.71 GFLOPS | Progress: (12/20) | 7.69 s
[Task 6/25] Current/Best: 19.99/ 20.71 GFLOPS | Progress: (16/20) | 9.94 s
[Task 6/25] Current/Best: 3.74/ 20.71 GFLOPS | Progress: (20/20) | 12.48 s Done.
+
[Task 7/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 7/25] Current/Best: 11.28/ 12.81 GFLOPS | Progress: (4/20) | 3.42 s
[Task 7/25] Current/Best: 20.31/ 21.17 GFLOPS | Progress: (8/20) | 4.90 s
[Task 7/25] Current/Best: 16.18/ 21.17 GFLOPS | Progress: (12/20) | 6.77 s
[Task 7/25] Current/Best: 12.27/ 21.17 GFLOPS | Progress: (16/20) | 8.80 s
[Task 7/25] Current/Best: 6.40/ 21.79 GFLOPS | Progress: (20/20) | 11.23 s Done.
+
[Task 8/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 8/25] Current/Best: 9.86/ 14.05 GFLOPS | Progress: (4/20) | 2.80 s
[Task 8/25] Current/Best: 9.62/ 14.05 GFLOPS | Progress: (8/20) | 7.93 s
[Task 8/25] Current/Best: 12.65/ 14.05 GFLOPS | Progress: (12/20) | 14.40 s
[Task 8/25] Current/Best: 18.84/ 18.84 GFLOPS | Progress: (16/20) | 16.49 s
[Task 8/25] Current/Best: 19.87/ 19.87 GFLOPS | Progress: (20/20) | 23.62 s Done.
+
[Task 9/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 9/25] Current/Best: 14.38/ 15.75 GFLOPS | Progress: (4/20) | 18.75 s
[Task 9/25] Current/Best: 23.53/ 23.53 GFLOPS | Progress: (8/20) | 20.41 s
[Task 9/25] Current/Best: 8.30/ 23.53 GFLOPS | Progress: (12/20) | 22.90 s
[Task 9/25] Current/Best: 17.90/ 23.53 GFLOPS | Progress: (16/20) | 25.64 s
[Task 9/25] Current/Best: 9.07/ 23.53 GFLOPS | Progress: (20/20) | 34.26 s
[Task 10/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 10/25] Current/Best: 18.22/ 18.22 GFLOPS | Progress: (4/20) | 2.46 s
[Task 10/25] Current/Best: 15.48/ 18.22 GFLOPS | Progress: (8/20) | 4.06 s
[Task 10/25] Current/Best: 12.93/ 18.87 GFLOPS | Progress: (12/20) | 5.59 s
[Task 10/25] Current/Best: 19.09/ 20.47 GFLOPS | Progress: (16/20) | 6.68 s
[Task 10/25] Current/Best: 8.80/ 20.47 GFLOPS | Progress: (20/20
) | 8.22 s Done.
+
[Task 11/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 11/25] Current/Best: 12.23/ 18.11 GFLOPS | Progress: (4/20) | 3.24 s
[Task 11/25] Current/Best: 16.79/ 18.11 GFLOPS | Progress: (8/20) | 6.04 s
[Task 11/25] Current/Best: 18.11/ 18.11 GFLOPS | Progress: (12/20) | 8.05 s
[Task 11/25] Current/Best: 13.43/ 21.21 GFLOPS | Progress: (16/20) | 10.97 s
[Task 11/25] Current/Best: 19.56/ 21.65 GFLOPS | Progress: (20/20) | 13.06 s Done.
+
[Task 12/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 12/25] Current/Best: 7.84/ 18.04 GFLOPS | Progress: (4/20) | 5.67 s
[Task 12/25] Current/Best: 5.29/ 18.04 GFLOPS | Progress: (8/20) | 9.57 s
[Task 12/25] Current/Best: 18.85/ 18.87 GFLOPS | Progress: (12/20) | 11.54 s
[Task 12/25] Current/Best: 15.52/ 18.87 GFLOPS | Progress: (16/20) | 14.47 s
[Task 12/25] Current/Best: 15.15/ 18.87 GFLOPS | Progress: (20/20) | 16.41 s Done.
+
[Task 13/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 13/25] Current/Best: 8.65/ 17.24 GFLOPS | Progress: (4/20) | 3.62 s
[Task 13/25] Current/Best: 16.14/ 21.13 GFLOPS | Progress: (8/20) | 6.19 s
[Task 13/25] Current/Best: 19.60/ 21.71 GFLOPS | Progress: (12/20) | 9.18 s
[Task 13/25] Current/Best: 12.30/ 21.71 GFLOPS | Progress: (16/20) | 12.58 s
[Task 13/25] Current/Best: 17.64/ 21.71 GFLOPS | Progress: (20/20) | 14.91 s Done.
+
[Task 14/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 14/25] Current/Best: 13.37/ 13.37 GFLOPS | Progress: (4/20) | 3.22 s
[Task 14/25] Current/Best: 6.10/ 13.37 GFLOPS | Progress: (8/20) | 5.42 s
[Task 14/25] Current/Best: 21.09/ 21.09 GFLOPS | Progress: (12/20) | 8.05 s
[Task 14/25] Current/Best: 16.82/ 21.09 GFLOPS | Progress: (16/20) | 9.92 s
[Task 14/25] Current/Best: 17.34/ 21.09 GFLOPS | Progress: (20/20) | 11.68 s
[Task 15/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
Done.
-
[Task 15/25] Current/Best: 16.11/ 17.57 GFLOPS | Progress: (4/20) | 2.54 s
[Task 15/25] Current/Best: 14.32/ 18.06 GFLOPS | Progress: (8/20) | 4.00 s
[Task 15/25] Current/Best: 10.41/ 21.99 GFLOPS | Progress: (12/20) | 6.30 s
[Task 15/25] Current/Best: 20.43/ 21.99 GFLOPS | Progress: (16/20) | 9.43 s
[Task 15/25] Current/Best: 9.66/ 21.99 GFLOPS | Progress: (20/20) | 10.61 s
[Task 16/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 16/25] Current/Best: 20.41/ 20.41 GFLOPS | Progress: (4/20) | 2.79 s
[Task 16/25] Current/Best: 3.04/ 20.41 GFLOPS | Progress: (8/20) | 4.38 s
[Task 16/25] Current/Best: 19.49/ 20.41 GFLOPS | Progress: (12/20) | 5.57 s
[Task 16/25] Current/Best: 17.59/ 20.41 GFLOPS | Progress: (16/20) | 6.93 s
[Task 16/25] Current/Best: 10.04/ 22.42 GFLOPS | Progress: (20/20) | 9.08 s Done.
-
[Task 17/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 17/25] Current/Best: 12.89/ 18.90 GFLOPS | Progress: (4/20) | 4.72 s
[Task 17/25] Current/Best: 14.52/ 23.44 GFLOPS | Progress: (8/20) | 7.49 s
[Task 17/25] Current/Best: 17.26/ 23.44 GFLOPS | Progress: (12/20) | 9.53 s
[Task 17/25] Current/Best: 16.58/ 23.44 GFLOPS | Progress: (16/20) | 11.73 s
[Task 17/25] Current/Best: 10.06/ 23.44 GFLOPS | Progress: (20/20) | 13.87 s Done.
-
[Task 18/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 18/25] Current/Best: 11.36/ 17.69 GFLOPS | Progress: (4/20) | 3.67 s
[Task 18/25] Current/Best: 10.54/ 18.21 GFLOPS | Progress: (8/20) | 7.37 s
[Task 18/25] Current/Best: 18.63/ 18.63 GFLOPS | Progress: (12/20) | 9.30 s
[Task 18/25] Current/Best: 10.14/ 18.63 GFLOPS | Progress: (16/20) | 13.17 s
[Task 18/25] Current/Best: 20.64/ 20.64 GFLOPS | Progress: (20/20) | 14.71 s Done.
-
[Task 19/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 19/25] Current/Best: 7.29/ 20.48 GFLOPS | Progress: (4/20) | 5.95 s
[Task 19/25] Current/Best: 2.61/ 20.48 GFLOPS | Progress: (8/20) | 9.29 s
[Task 19/25] Current/Best: 16.63/ 21.78 GFLOPS | Progress: (12/20) | 12.28 s
[Task 19/25] Current/Best: 15.24/ 21.78 GFLOPS | Progress: (16/20) | 15.37 s
[Task 19/25] Current/Best: 2.70/ 23.75 GFLOPS | Progress: (20/20) | 18.18 s Done.
-
[Task 20/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 20/25] Current/Best: 9.46/ 15.45 GFLOPS | Progress: (4/20) | 3.23 s
[Task 20/25] Current/Best: 9.69/ 15.45 GFLOPS | Progress: (8/20) | 6.77 s
[Task 20/25] Current/Best: 2.32/ 16.48 GFLOPS | Progress: (12/20) | 10.70 s Done.
-
[Task 20/25] Current/Best: 12.43/ 16.48 GFLOPS | Progress: (16/20) | 14.56 s
[Task 20/25] Current/Best: 10.41/ 22.05 GFLOPS | Progress: (20/20) | 16.70 s Done.
-
[Task 21/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 21/25] Current/Best: 6.42/ 17.73 GFLOPS | Progress: (4/20) | 3.17 s
[Task 21/25] Current/Best: 14.66/ 17.73 GFLOPS | Progress: (8/20) | 4.77 s
[Task 21/25] Current/Best: 1.61/ 17.73 GFLOPS | Progress: (12/20) | 6.88 s
[Task 21/25] Current/Best: 18.05/ 18.05 GFLOPS | Progress: (16/20) | 10.33 s
[Task 21/25] Current/Best: 4.47/ 18.05 GFLOPS | Progress: (20/20) | 17.66 s
[Task 22/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 22/25] Current/Best: 2.71/ 16.96 GFLOPS | Progress: (4/20) | 2.58 s
[Task 22/25] Current/Best: 8.60/ 22.12 GFLOPS | Progress: (8/20) | 4.61 s
[Task 22/25] Current/Best: 20.00/ 22.12 GFLOPS | Progress: (12/20) | 6.98 s
[Task 22/25] Current/Best: 14.75/ 22.12 GFLOPS | Progress: (16/20) | 9.10 s
[Task 22/25] Current/Best: 13.72/ 22.12 GFLOPS | Progress: (20/20) |
10.82 s Done.
-
[Task 23/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 23/25] Current/Best: 17.73/ 20.93 GFLOPS | Progress: (4/20) | 3.12 s
[Task 23/25] Current/Best: 14.12/ 20.93 GFLOPS | Progress: (8/20) | 6.51 s
[Task 23/25] Current/Best: 21.00/ 21.84 GFLOPS | Progress: (12/20) | 8.31 s
[Task 23/25] Current/Best: 6.45/ 21.84 GFLOPS | Progress: (16/20) | 15.35 s
[Task 23/25] Current/Best: 8.00/ 21.84 GFLOPS | Progress: (20/20) | 19.53 s Done.
-
[Task 24/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 24/25] Current/Best: 8.54/ 8.54 GFLOPS | Progress: (4/20) | 14.15 s
[Task 24/25] Current/Best: 3.72/ 8.54 GFLOPS | Progress: (8/20) | 30.21 s
[Task 24/25] Current/Best: 4.31/ 8.54 GFLOPS | Progress: (12/20) | 54.06 s
[Task 24/25] Current/Best: 5.76/ 8.78 GFLOPS | Progress: (16/20) | 59.71 s Done.
-
[Task 24/25] Current/Best: 3.34/ 8.78 GFLOPS | Progress: (20/20) | 65.75 s Done.
-
[Task 25/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 25/25] Current/Best: 1.55/ 2.77 GFLOPS | Progress: (4/20) | 32.49 s
[Task 25/25] Current/Best: 5.94/ 8.12 GFLOPS | Progress: (8/20) | 323.29 s
[Task 25/25] Current/Best: 6.01/ 8.12 GFLOPS | Progress: (12/20) | 351.62 s
[Task 25/25] Current/Best: 5.78/ 9.06 GFLOPS | Progress: (16/20) | 353.42 s
[Task 25/25] Current/Best: 2.85/ 9.35 GFLOPS | Progress: (20/20) | 373.32 s
+
[Task 15/25] Current/Best: 16.19/ 17.62 GFLOPS | Progress: (4/20) | 2.55 s
[Task 15/25] Current/Best: 14.46/ 18.11 GFLOPS | Progress: (8/20) | 4.05 s
[Task 15/25] Current/Best: 10.33/ 22.37 GFLOPS | Progress: (12/20) | 6.39 s
[Task 15/25] Current/Best: 20.42/ 22.37 GFLOPS | Progress: (16/20) | 9.56 s
[Task 15/25] Current/Best: 9.65/ 22.37 GFLOPS | Progress: (20/20) | 10.73 s
[Task 16/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 16/25] Current/Best: 20.63/ 20.63 GFLOPS | Progress: (4/20) | 2.80 s
[Task 16/25] Current/Best: 3.05/ 20.63 GFLOPS | Progress: (8/20) | 4.39 s
[Task 16/25] Current/Best: 19.53/ 20.63 GFLOPS | Progress: (12/20) | 5.58 s
[Task 16/25] Current/Best: 17.87/ 20.63 GFLOPS | Progress: (16/20) | 6.95 s
[Task 16/25] Current/Best: 10.05/ 22.33 GFLOPS | Progress: (20/20) | 9.08 s Done.
+
[Task 17/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 17/25] Current/Best: 13.16/ 16.62 GFLOPS | Progress: (4/20) | 4.74 s
[Task 17/25] Current/Best: 14.40/ 23.39 GFLOPS | Progress: (8/20) | 7.58 s
[Task 17/25] Current/Best: 16.88/ 23.39 GFLOPS | Progress: (12/20) | 9.62 s
[Task 17/25] Current/Best: 16.55/ 23.39 GFLOPS | Progress: (16/20) | 11.80 s
[Task 17/25] Current/Best: 10.05/ 23.39 GFLOPS | Progress: (20/20) | 13.93 s Done.
+
[Task 18/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 18/25] Current/Best: 11.35/ 18.18 GFLOPS | Progress: (4/20) | 3.68 s
[Task 18/25] Current/Best: 10.58/ 19.37 GFLOPS | Progress: (8/20) | 7.31 s
[Task 18/25] Current/Best: 19.05/ 19.37 GFLOPS | Progress: (12/20) | 9.21 s
[Task 18/25] Current/Best: 10.08/ 19.37 GFLOPS | Progress: (16/20) | 13.07 s
[Task 18/25] Current/Best: 20.52/ 20.52 GFLOPS | Progress: (20/20) | 14.56 s Done.
+
[Task 19/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 19/25] Current/Best: 7.29/ 20.51 GFLOPS | Progress: (4/20) | 5.96 s
[Task 19/25] Current/Best: 2.61/ 20.51 GFLOPS | Progress: (8/20) | 9.31 s
[Task 19/25] Current/Best: 20.47/ 22.10 GFLOPS | Progress: (12/20) | 12.32 s
[Task 19/25] Current/Best: 14.40/ 22.10 GFLOPS | Progress: (16/20) | 15.38 s
[Task 19/25] Current/Best: 2.71/ 23.85 GFLOPS | Progress: (20/20) | 18.19 s Done.
+
[Task 20/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 20/25] Current/Best: 8.47/ 15.20 GFLOPS | Progress: (4/20) | 3.25 s
[Task 20/25] Current/Best: 9.74/ 15.20 GFLOPS | Progress: (8/20) | 6.78 s
[Task 20/25] Current/Best: 2.32/ 16.54 GFLOPS | Progress: (12/20) | 10.69 s Done.
+
[Task 20/25] Current/Best: 12.34/ 16.54 GFLOPS | Progress: (16/20) | 14.39 s
[Task 20/25] Current/Best: 11.79/ 22.35 GFLOPS | Progress: (20/20) | 16.48 s Done.
+
[Task 21/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 21/25] Current/Best: 6.43/ 17.60 GFLOPS | Progress: (4/20) | 3.16 s
[Task 21/25] Current/Best: 14.67/ 17.60 GFLOPS | Progress: (8/20) | 4.74 s
[Task 21/25] Current/Best: 1.61/ 17.60 GFLOPS | Progress: (12/20) | 6.81 s
[Task 21/25] Current/Best: 18.03/ 18.03 GFLOPS | Progress: (16/20) | 10.26 s
[Task 21/25] Current/Best: 4.46/ 18.03 GFLOPS | Progress: (20/20) | 17.49 s
[Task 22/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 22/25] Current/Best: 2.71/ 16.95 GFLOPS | Progress: (4/20) | 2.56 s
[Task 22/25] Current/Best: 8.62/ 21.89 GFLOPS | Progress: (8/20) | 4.53 s
[Task 22/25] Current/Best: 20.12/ 21.89 GFLOPS | Progress: (12/20) | 6.91 s
[Task 22/25] Current/Best: 14.81/ 21.89 GFLOPS | Progress: (16/20) | 9.02 s
[Task 22/25] Current/Best: 14.07/ 21.89 GFLOPS | Progress: (20/20) |
10.74 s Done.
+
[Task 23/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 23/25] Current/Best: 17.73/ 20.53 GFLOPS | Progress: (4/20) | 3.12 s
[Task 23/25] Current/Best: 13.50/ 20.53 GFLOPS | Progress: (8/20) | 6.41 s
[Task 23/25] Current/Best: 20.96/ 21.68 GFLOPS | Progress: (12/20) | 8.23 s
[Task 23/25] Current/Best: 6.50/ 21.68 GFLOPS | Progress: (16/20) | 15.23 s
[Task 23/25] Current/Best: 7.91/ 21.68 GFLOPS | Progress: (20/20) | 19.42 s Done.
+
[Task 24/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 24/25] Current/Best: 8.64/ 8.64 GFLOPS | Progress: (4/20) | 13.64 s
[Task 24/25] Current/Best: 2.15/ 8.64 GFLOPS | Progress: (8/20) | 30.68 s
[Task 24/25] Current/Best: 4.54/ 8.64 GFLOPS | Progress: (12/20) | 54.24 s
[Task 24/25] Current/Best: 6.37/ 9.01 GFLOPS | Progress: (16/20) | 59.88 s Done.
+
[Task 24/25] Current/Best: 3.45/ 9.01 GFLOPS | Progress: (20/20) | 65.92 s Done.
+
[Task 25/25] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/20) | 0.00 s
[Task 25/25] Current/Best: 1.55/ 2.79 GFLOPS | Progress: (4/20) | 32.28 s
[Task 25/25] Current/Best: 6.40/ 8.48 GFLOPS | Progress: (8/20) | 318.70 s
[Task 25/25] Current/Best: 5.95/ 8.48 GFLOPS | Progress: (12/20) | 347.63 s
[Task 25/25] Current/Best: 5.95/ 8.93 GFLOPS | Progress: (16/20) | 349.33 s
[Task 25/25] Current/Best: 2.89/ 9.39 GFLOPS | Progress: (20/20) | 369.17 s
The output from this tuning process will look something like this:
@@ -651,8 +651,8 @@ improvement in comparing the optimized model to the unoptimized model.
.. code-block:: none
- optimized: {'mean': 409.65435204000187, 'median': 409.6145139500095, 'std': 0.5035102469973483}
- unoptimized: {'mean': 492.0485737900015, 'median': 491.6691768000021, 'std': 0.8640093566493087}
+ optimized: {'mean': 407.073206069997, 'median': 407.2628067499977, 'std': 0.6441496962374219}
+ unoptimized: {'mean': 490.8931876000008, 'median': 490.8230106499957, 'std': 0.5127112764728715}
@@ -672,7 +672,7 @@ profiling/benchmarking.
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 16 minutes 10.276 seconds)
+ **Total running time of the script:** ( 16 minutes 4.879 seconds)
.. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 0e11edbf1..8066ec160 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -235,7 +235,7 @@ device and returns the measured cost. Network overhead is excluded.
.. code-block:: none
- 1.298e-07 secs/op
+ 1.309e-07 secs/op
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index ea1810aed..f0f50dc80 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -233,7 +233,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
.. code-block:: none
- [stage(a, placeholder(a, 0x22b57700)), stage(b, placeholder(b, 0x12236330)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(mi [...]
+ [stage(a, placeholder(a, 0xd0f2380)), stage(b, placeholder(b, 0x20c37150)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index 91af2d44d..01b031a62 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,17 +5,17 @@
Computation times
=================
-**18:55.209** total execution time for **tutorial** files:
+**19:07.558** total execution time for **tutorial** files:
-- **16:10.276**: :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)
-- **01:00.330**: :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
-- **00:50.794**: :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``)
-- **00:25.903**: :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)
-- **00:25.518**: :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)
-- **00:01.369**: :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)
-- **00:00.704**: :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)
-- **00:00.190**: :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
-- **00:00.032**: :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)
-- **00:00.031**: :ref:`sphx_glr_tutorial_install.py` (``install.py``)
-- **00:00.030**: :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)
-- **00:00.030**: :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)
+- **16:04.879**: :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)
+- **01:11.014**: :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``)
+- **01:00.347**: :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
+- **00:25.407**: :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)
+- **00:23.610**: :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)
+- **00:01.315**: :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)
+- **00:00.690**: :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)
+- **00:00.182**: :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
+- **00:00.030**: :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)
+- **00:00.029**: :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)
+- **00:00.028**: :ref:`sphx_glr_tutorial_install.py` (``install.py``)
+- **00:00.027**: :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 62fb5f5c1..94412b35c 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -243,8 +243,8 @@ helper function to run a profile of the TVM generated code.
.. code-block:: none
- Numpy running time: 0.000007
- naive: 0.000008
+ Numpy running time: 0.000008
+ naive: 0.000006
@@ -438,10 +438,10 @@ We can now compare the different schedules
.. code-block:: none
Operator Timing Performance
- numpy 7.436289999986911e-06 1.0
- naive 7.6159e-06 1.0241531731566957
- parallel 6.0648e-06 0.8155679781195562
- vector 2.46243e-05 3.3113689756643896
+ numpy 8.178099999440747e-06 1.0
+ naive 5.8726e-06 0.7180885536251199
+ parallel 6.0821e-06 0.7437057507753536
+ vector 2.45371e-05 3.0003423780190936
@@ -830,7 +830,7 @@ matrix multiplication.
.. code-block:: none
- Numpy running time: 0.017948
+ Numpy running time: 0.017574
@@ -886,7 +886,7 @@ optimizations.
.. code-block:: none
- none: 3.396838
+ none: 3.388776
@@ -985,7 +985,7 @@ schedule.
.. code-block:: none
- blocking: 0.291171
+ blocking: 0.299878
@@ -1077,7 +1077,7 @@ already cache friendly from our previous optimizations.
.. code-block:: none
- vectorization: 0.323090
+ vectorization: 0.331291
@main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1149,7 +1149,7 @@ more cache friendly.
.. code-block:: none
- loop permutation: 0.116254
+ loop permutation: 0.113738
@main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1246,7 +1246,7 @@ optimized schedule.
.. code-block:: none
- array packing: 0.110528
+ array packing: 0.108084
@main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1337,7 +1337,7 @@ to `C` when all the block results are ready.
.. code-block:: none
- block caching: 0.110834
+ block caching: 0.111185
@main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1421,7 +1421,7 @@ of thread-level parallelization.
.. code-block:: none
- parallelization: 0.144285
+ parallelization: 0.144311
@main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1500,13 +1500,13 @@ working, we can compare the results.
.. code-block:: none
Operator Timing Performance
- none 3.3968376278999997 1.0
- blocking 0.2911707229 0.08571817519579475
- vectorization 0.323090222 0.09511500324486853
- loop permutation 0.11625381580000001 0.03422413095202042
- array packing 0.1105277781 0.03253843433438728
- block caching 0.11083402419999999 0.03262859057190792
- parallelization 0.1442845978 0.04247615388351663
+ none 3.3887764241999996 1.0
+ blocking 0.2998780702 0.08849154758587925
+ vectorization 0.3312914261 0.09776137007274216
+ loop permutation 0.11373797159999999 0.033563138242987074
+ array packing 0.108083941 0.03189468040091071
+ block caching 0.11118478700000001 0.03280971450521342
+ parallelization 0.144311289 0.04258507229023469
@@ -1543,7 +1543,7 @@ the computation for specific platforms.
.. rst-class:: sphx-glr-timing
- **Total running time of the script:** ( 1 minutes 0.330 seconds)
+ **Total running time of the script:** ( 1 minutes 0.347 seconds)
.. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
diff --git a/docs/commit_hash b/docs/commit_hash
index 3a3b1f81f..5d218762a 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-a3a4155943cd1a8ced35060902907cde2ba44cd8
+de21c8f2ef507587fdcc99b851404de5aeeb5a16
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 3a64bb0cb..c0696ad47 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -401,7 +401,7 @@
</div>
<img alt="../../_images/sphx_glr_from_mxnet_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_from_mxnet_001.png" />
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip2e9fcbdd-04fb-48d4-9551-2fca667db007 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip241648fa-9037-4f2c-98f8-146ee42e6cc7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
x (1, 3, 224, 224)
</pre></div>
</div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index a7f3ab72e..fe16a238f 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -406,49 +406,1836 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
0%| | 0.00/41.5M [00:00<?, ?B/s]
- 0%| | 16.0k/41.5M [00:00<07:45, 93.5kB/s]
- 0%| | 48.0k/41.5M [00:00<04:53, 148kB/s]
- 0%| | 96.0k/41.5M [00:00<03:28, 208kB/s]
- 0%| | 168k/41.5M [00:00<02:28, 291kB/s]
- 1%| | 336k/41.5M [00:00<01:20, 538kB/s]
- 2%|1 | 648k/41.5M [00:01<00:44, 971kB/s]
- 3%|3 | 1.27M/41.5M [00:01<00:22, 1.88MB/s]
- 6%|6 | 2.53M/41.5M [00:01<00:11, 3.68MB/s]
- 10%|9 | 4.03M/41.5M [00:01<00:07, 5.32MB/s]
- 13%|#3 | 5.52M/41.5M [00:01<00:05, 7.14MB/s]
- 16%|#5 | 6.59M/41.5M [00:01<00:04, 8.01MB/s]
- 18%|#7 | 7.44M/41.5M [00:01<00:04, 7.54MB/s]
- 20%|#9 | 8.21M/41.5M [00:02<00:05, 6.01MB/s]
- 23%|##2 | 9.52M/41.5M [00:02<00:05, 6.58MB/s]
- 25%|##4 | 10.3M/41.5M [00:02<00:05, 6.42MB/s]
- 28%|##8 | 11.8M/41.5M [00:02<00:04, 7.23MB/s]
- 32%|###1 | 13.3M/41.5M [00:02<00:03, 7.76MB/s]
- 35%|###4 | 14.4M/41.5M [00:03<00:04, 6.95MB/s]
- 38%|###8 | 15.9M/41.5M [00:03<00:03, 7.52MB/s]
- 40%|#### | 16.7M/41.5M [00:03<00:03, 6.65MB/s]
- 44%|####3 | 18.1M/41.5M [00:03<00:03, 7.24MB/s]
- 47%|####7 | 19.6M/41.5M [00:03<00:02, 7.71MB/s]
- 51%|##### | 21.1M/41.5M [00:03<00:02, 8.07MB/s]
- 55%|#####4 | 22.6M/41.5M [00:04<00:02, 8.32MB/s]
- 58%|#####8 | 24.1M/41.5M [00:04<00:01, 9.53MB/s]
- 62%|######1 | 25.6M/41.5M [00:04<00:01, 10.2MB/s]
- 64%|######4 | 26.6M/41.5M [00:04<00:02, 7.20MB/s]
- 67%|######6 | 27.6M/41.5M [00:04<00:02, 6.90MB/s]
- 68%|######8 | 28.4M/41.5M [00:04<00:02, 6.23MB/s]
- 71%|#######1 | 29.5M/41.5M [00:05<00:02, 6.27MB/s]
- 74%|#######3 | 30.6M/41.5M [00:05<00:01, 6.32MB/s]
- 76%|#######6 | 31.7M/41.5M [00:05<00:01, 6.39MB/s]
- 79%|#######9 | 32.8M/41.5M [00:05<00:01, 6.95MB/s]
- 82%|########1 | 33.9M/41.5M [00:05<00:01, 7.26MB/s]
- 84%|########4 | 35.0M/41.5M [00:05<00:00, 7.11MB/s]
- 87%|########7 | 36.2M/41.5M [00:06<00:00, 7.65MB/s]
- 89%|########9 | 36.9M/41.5M [00:06<00:00, 7.29MB/s]
- 91%|######### | 37.7M/41.5M [00:06<00:00, 6.29MB/s]
- 93%|#########2| 38.5M/41.5M [00:06<00:00, 5.96MB/s]
- 94%|#########4| 39.1M/41.5M [00:06<00:00, 5.12MB/s]
- 97%|#########6| 40.1M/41.5M [00:06<00:00, 5.32MB/s]
- 99%|#########8| 40.9M/41.5M [00:07<00:00, 5.20MB/s]
-100%|##########| 41.5M/41.5M [00:07<00:00, 6.12MB/s]
+ 0%| | 16.0k/41.5M [00:00<15:35, 46.5kB/s]
+ 0%| | 56.0k/41.5M [00:00<05:50, 124kB/s]
+ 0%| | 72.0k/41.5M [00:00<06:26, 112kB/s]
+ 0%| | 88.0k/41.5M [00:00<06:51, 105kB/s]
+ 0%| | 104k/41.5M [00:01<07:08, 101kB/s]
+ 0%| | 120k/41.5M [00:01<07:20, 98.5kB/s]
+ 0%| | 136k/41.5M [00:01<07:28, 96.7kB/s]
+ 0%| | 152k/41.5M [00:01<07:33, 95.5kB/s]
+ 0%| | 168k/41.5M [00:01<07:37, 94.6kB/s]
+ 0%| | 184k/41.5M [00:01<07:40, 94.1kB/s]
+ 0%| | 200k/41.5M [00:02<10:02, 71.9kB/s]
+ 1%| | 216k/41.5M [00:02<11:41, 61.7kB/s]
+ 1%| | 240k/41.5M [00:02<09:08, 78.9kB/s]
+ 1%| | 256k/41.5M [00:03<15:05, 47.7kB/s]
+ 1%| | 264k/41.5M [00:03<15:09, 47.5kB/s]
+ 1%| | 272k/41.5M [00:03<15:14, 47.3kB/s]
+ 1%| | 280k/41.5M [00:04<15:17, 47.1kB/s]
+ 1%| | 288k/41.5M [00:04<15:20, 46.9kB/s]
+ 1%| | 296k/41.5M [00:04<15:23, 46.8kB/s]
+ 1%| | 304k/41.5M [00:04<15:25, 46.7kB/s]
+ 1%| | 312k/41.5M [00:04<15:26, 46.6kB/s]
+ 1%| | 320k/41.5M [00:04<16:28, 43.7kB/s]
+ 1%| | 328k/41.5M [00:05<19:36, 36.7kB/s]
+ 1%| | 336k/41.5M [00:05<18:24, 39.1kB/s]
+ 1%| | 352k/41.5M [00:05<14:23, 50.0kB/s]
+ 1%| | 360k/41.5M [00:06<21:21, 33.7kB/s]
+ 1%| | 376k/41.5M [00:06<20:26, 35.2kB/s]
+ 1%| | 384k/41.5M [00:06<19:18, 37.2kB/s]
+ 1%| | 392k/41.5M [00:07<21:16, 33.8kB/s]
+ 1%| | 400k/41.5M [00:07<23:31, 30.5kB/s]
+ 1%| | 408k/41.5M [00:07<21:37, 33.2kB/s]
+ 1%| | 416k/41.5M [00:07<23:56, 30.0kB/s]
+ 1%| | 424k/41.5M [00:08<21:31, 33.3kB/s]
+ 1%|1 | 432k/41.5M [00:08<19:46, 36.3kB/s]
+ 1%|1 | 440k/41.5M [00:08<23:02, 31.1kB/s]
+ 1%|1 | 456k/41.5M [00:08<18:05, 39.6kB/s]
+ 1%|1 | 464k/41.5M [00:09<19:03, 37.6kB/s]
+ 1%|1 | 472k/41.5M [00:09<18:07, 39.6kB/s]
+ 1%|1 | 480k/41.5M [00:09<17:23, 41.2kB/s]
+ 1%|1 | 488k/41.5M [00:09<16:51, 42.5kB/s]
+ 1%|1 | 496k/41.5M [00:09<16:27, 43.6kB/s]
+ 1%|1 | 504k/41.5M [00:10<14:15, 50.3kB/s]
+ 1%|1 | 512k/41.5M [00:10<14:36, 49.1kB/s]
+ 1%|1 | 528k/41.5M [00:10<12:57, 55.2kB/s]
+ 1%|1 | 536k/41.5M [00:10<13:34, 52.8kB/s]
+ 1%|1 | 544k/41.5M [00:10<13:18, 53.8kB/s]
+ 1%|1 | 560k/41.5M [00:10<11:31, 62.0kB/s]
+ 1%|1 | 568k/41.5M [00:11<11:46, 60.8kB/s]
+ 1%|1 | 584k/41.5M [00:11<10:42, 66.7kB/s]
+ 1%|1 | 592k/41.5M [00:11<11:07, 64.3kB/s]
+ 1%|1 | 608k/41.5M [00:11<09:47, 73.0kB/s]
+ 1%|1 | 624k/41.5M [00:11<09:02, 79.0kB/s]
+ 2%|1 | 640k/41.5M [00:12<08:35, 83.2kB/s]
+ 2%|1 | 656k/41.5M [00:12<08:17, 86.1kB/s]
+ 2%|1 | 672k/41.5M [00:12<08:06, 88.1kB/s]
+ 2%|1 | 688k/41.5M [00:12<07:58, 89.5kB/s]
+ 2%|1 | 704k/41.5M [00:12<10:15, 69.6kB/s]
+ 2%|1 | 736k/41.5M [00:13<07:35, 93.9kB/s]
+ 2%|1 | 752k/41.5M [00:13<07:36, 93.5kB/s]
+ 2%|1 | 768k/41.5M [00:13<07:37, 93.3kB/s]
+ 2%|1 | 784k/41.5M [00:13<07:38, 93.2kB/s]
+ 2%|1 | 800k/41.5M [00:13<07:38, 93.1kB/s]
+ 2%|1 | 816k/41.5M [00:13<07:38, 93.0kB/s]
+ 2%|1 | 832k/41.5M [00:14<07:13, 98.4kB/s]
+ 2%|1 | 848k/41.5M [00:14<09:35, 74.1kB/s]
+ 2%|2 | 880k/41.5M [00:14<07:17, 97.3kB/s]
+ 2%|2 | 896k/41.5M [00:14<07:23, 95.9kB/s]
+ 2%|2 | 912k/41.5M [00:15<07:27, 95.1kB/s]
+ 2%|2 | 928k/41.5M [00:15<07:29, 94.7kB/s]
+ 2%|2 | 944k/41.5M [00:15<07:31, 94.1kB/s]
+ 2%|2 | 960k/41.5M [00:15<11:56, 59.4kB/s]
+ 2%|2 | 0.98M/41.5M [00:16<07:27, 95.0kB/s]
+ 2%|2 | 0.99M/41.5M [00:16<07:29, 94.5kB/s]
+ 2%|2 | 1.01M/41.5M [00:16<09:22, 75.5kB/s]
+ 2%|2 | 1.03M/41.5M [00:16<07:53, 89.5kB/s]
+ 3%|2 | 1.05M/41.5M [00:16<07:49, 90.3kB/s]
+ 3%|2 | 1.06M/41.5M [00:17<07:46, 90.9kB/s]
+ 3%|2 | 1.08M/41.5M [00:17<07:43, 91.4kB/s]
+ 3%|2 | 1.09M/41.5M [00:17<09:48, 71.9kB/s]
+ 3%|2 | 1.11M/41.5M [00:17<09:41, 72.8kB/s]
+ 3%|2 | 1.12M/41.5M [00:18<10:01, 70.4kB/s]
+ 3%|2 | 1.13M/41.5M [00:18<09:13, 76.4kB/s]
+ 3%|2 | 1.14M/41.5M [00:18<10:21, 68.1kB/s]
+ 3%|2 | 1.16M/41.5M [00:18<09:23, 75.1kB/s]
+ 3%|2 | 1.17M/41.5M [00:18<08:47, 80.2kB/s]
+ 3%|2 | 1.18M/41.5M [00:18<10:01, 70.3kB/s]
+ 3%|2 | 1.20M/41.5M [00:19<11:50, 59.4kB/s]
+ 3%|2 | 1.20M/41.5M [00:19<12:31, 56.2kB/s]
+ 3%|2 | 1.23M/41.5M [00:19<09:03, 77.7kB/s]
+ 3%|2 | 1.24M/41.5M [00:19<09:10, 76.7kB/s]
+ 3%|3 | 1.25M/41.5M [00:19<09:34, 73.4kB/s]
+ 3%|3 | 1.27M/41.5M [00:20<09:30, 73.9kB/s]
+ 3%|3 | 1.27M/41.5M [00:20<09:54, 71.0kB/s]
+ 3%|3 | 1.29M/41.5M [00:20<09:04, 77.5kB/s]
+ 3%|3 | 1.30M/41.5M [00:20<08:33, 82.1kB/s]
+ 3%|3 | 1.31M/41.5M [00:20<10:33, 66.5kB/s]
+ 3%|3 | 1.33M/41.5M [00:21<09:29, 74.0kB/s]
+ 3%|3 | 1.34M/41.5M [00:21<09:27, 74.2kB/s]
+ 3%|3 | 1.35M/41.5M [00:21<09:50, 71.3kB/s]
+ 3%|3 | 1.36M/41.5M [00:21<10:59, 63.8kB/s]
+ 3%|3 | 1.38M/41.5M [00:21<09:40, 72.5kB/s]
+ 3%|3 | 1.39M/41.5M [00:21<08:55, 78.6kB/s]
+ 3%|3 | 1.41M/41.5M [00:22<08:27, 82.8kB/s]
+ 3%|3 | 1.42M/41.5M [00:22<08:09, 85.8kB/s]
+ 3%|3 | 1.44M/41.5M [00:22<07:57, 87.9kB/s]
+ 4%|3 | 1.45M/41.5M [00:22<07:49, 89.3kB/s]
+ 4%|3 | 1.47M/41.5M [00:23<10:03, 69.5kB/s]
+ 4%|3 | 1.48M/41.5M [00:23<10:58, 63.7kB/s]
+ 4%|3 | 1.48M/41.5M [00:23<14:54, 46.9kB/s]
+ 4%|3 | 1.51M/41.5M [00:23<12:37, 55.3kB/s]
+ 4%|3 | 1.52M/41.5M [00:24<13:04, 53.5kB/s]
+ 4%|3 | 1.52M/41.5M [00:24<13:28, 51.8kB/s]
+ 4%|3 | 1.53M/41.5M [00:24<13:50, 50.5kB/s]
+ 4%|3 | 1.54M/41.5M [00:24<14:07, 49.4kB/s]
+ 4%|3 | 1.55M/41.5M [00:24<12:09, 57.4kB/s]
+ 4%|3 | 1.56M/41.5M [00:25<18:49, 37.1kB/s]
+ 4%|3 | 1.59M/41.5M [00:25<13:12, 52.8kB/s]
+ 4%|3 | 1.59M/41.5M [00:25<12:51, 54.2kB/s]
+ 4%|3 | 1.60M/41.5M [00:25<13:19, 52.3kB/s]
+ 4%|3 | 1.61M/41.5M [00:26<17:13, 40.5kB/s]
+ 4%|3 | 1.62M/41.5M [00:26<17:04, 40.8kB/s]
+ 4%|3 | 1.64M/41.5M [00:27<16:19, 42.7kB/s]
+ 4%|3 | 1.65M/41.5M [00:27<16:02, 43.4kB/s]
+ 4%|3 | 1.66M/41.5M [00:27<15:48, 44.0kB/s]
+ 4%|4 | 1.66M/41.5M [00:27<15:36, 44.6kB/s]
+ 4%|4 | 1.67M/41.5M [00:27<15:27, 45.0kB/s]
+ 4%|4 | 1.68M/41.5M [00:27<15:19, 45.4kB/s]
+ 4%|4 | 1.69M/41.5M [00:28<19:25, 35.8kB/s]
+ 4%|4 | 1.70M/41.5M [00:28<18:43, 37.1kB/s]
+ 4%|4 | 1.72M/41.5M [00:28<14:21, 48.4kB/s]
+ 4%|4 | 1.73M/41.5M [00:29<17:44, 39.2kB/s]
+ 4%|4 | 1.73M/41.5M [00:29<17:05, 40.7kB/s]
+ 4%|4 | 1.74M/41.5M [00:29<16:33, 42.0kB/s]
+ 4%|4 | 1.75M/41.5M [00:29<16:07, 43.0kB/s]
+ 4%|4 | 1.76M/41.5M [00:29<15:48, 43.9kB/s]
+ 4%|4 | 1.77M/41.5M [00:30<16:35, 41.9kB/s]
+ 4%|4 | 1.77M/41.5M [00:30<15:05, 46.0kB/s]
+ 4%|4 | 1.78M/41.5M [00:30<15:02, 46.1kB/s]
+ 4%|4 | 1.79M/41.5M [00:30<15:01, 46.2kB/s]
+ 4%|4 | 1.80M/41.5M [00:30<14:59, 46.3kB/s]
+ 4%|4 | 1.81M/41.5M [00:30<11:33, 60.0kB/s]
+ 4%|4 | 1.82M/41.5M [00:31<12:23, 56.0kB/s]
+ 4%|4 | 1.83M/41.5M [00:31<13:03, 53.1kB/s]
+ 4%|4 | 1.84M/41.5M [00:31<10:40, 64.9kB/s]
+ 4%|4 | 1.85M/41.5M [00:31<11:39, 59.4kB/s]
+ 5%|4 | 1.87M/41.5M [00:31<09:58, 69.4kB/s]
+ 5%|4 | 1.88M/41.5M [00:32<09:03, 76.3kB/s]
+ 5%|4 | 1.89M/41.5M [00:32<10:16, 67.4kB/s]
+ 5%|4 | 1.91M/41.5M [00:32<09:13, 75.0kB/s]
+ 5%|4 | 1.92M/41.5M [00:32<08:36, 80.3kB/s]
+ 5%|4 | 1.93M/41.5M [00:32<09:51, 70.2kB/s]
+ 5%|4 | 1.95M/41.5M [00:32<08:59, 76.8kB/s]
+ 5%|4 | 1.96M/41.5M [00:33<08:27, 81.6kB/s]
+ 5%|4 | 1.98M/41.5M [00:33<10:33, 65.4kB/s]
+ 5%|4 | 2.00M/41.5M [00:33<08:15, 83.6kB/s]
+ 5%|4 | 2.02M/41.5M [00:33<08:01, 86.0kB/s]
+ 5%|4 | 2.03M/41.5M [00:33<07:51, 87.8kB/s]
+ 5%|4 | 2.05M/41.5M [00:34<07:43, 89.2kB/s]
+ 5%|4 | 2.06M/41.5M [00:34<07:38, 90.2kB/s]
+ 5%|5 | 2.09M/41.5M [00:34<06:36, 104kB/s]
+ 5%|5 | 2.10M/41.5M [00:34<06:49, 101kB/s]
+ 5%|5 | 2.12M/41.5M [00:34<06:58, 98.5kB/s]
+ 5%|5 | 2.13M/41.5M [00:35<09:12, 74.7kB/s]
+ 5%|5 | 2.16M/41.5M [00:35<07:35, 90.6kB/s]
+ 5%|5 | 2.17M/41.5M [00:35<07:32, 91.1kB/s]
+ 5%|5 | 2.19M/41.5M [00:35<07:30, 91.6kB/s]
+ 5%|5 | 2.20M/41.5M [00:35<07:28, 91.9kB/s]
+ 5%|5 | 2.22M/41.5M [00:36<07:26, 92.2kB/s]
+ 5%|5 | 2.23M/41.5M [00:36<07:25, 92.3kB/s]
+ 5%|5 | 2.26M/41.5M [00:36<06:28, 106kB/s]
+ 5%|5 | 2.27M/41.5M [00:36<08:41, 78.9kB/s]
+ 6%|5 | 2.30M/41.5M [00:36<07:18, 93.6kB/s]
+ 6%|5 | 2.31M/41.5M [00:37<07:19, 93.4kB/s]
+ 6%|5 | 2.33M/41.5M [00:37<07:20, 93.2kB/s]
+ 6%|5 | 2.34M/41.5M [00:37<07:20, 93.1kB/s]
+ 6%|5 | 2.36M/41.5M [00:37<07:21, 93.0kB/s]
+ 6%|5 | 2.38M/41.5M [00:37<07:21, 92.9kB/s]
+ 6%|5 | 2.39M/41.5M [00:38<07:54, 86.3kB/s]
+ 6%|5 | 2.41M/41.5M [00:38<07:12, 94.9kB/s]
+ 6%|5 | 2.42M/41.5M [00:38<07:14, 94.2kB/s]
+ 6%|5 | 2.44M/41.5M [00:38<08:22, 81.6kB/s]
+ 6%|5 | 2.45M/41.5M [00:38<09:09, 74.5kB/s]
+ 6%|5 | 2.46M/41.5M [00:39<12:42, 53.6kB/s]
+ 6%|5 | 2.47M/41.5M [00:39<12:20, 55.3kB/s]
+ 6%|5 | 2.48M/41.5M [00:39<11:13, 60.8kB/s]
+ 6%|6 | 2.49M/41.5M [00:39<11:56, 57.1kB/s]
+ 6%|6 | 2.50M/41.5M [00:39<11:40, 58.3kB/s]
+ 6%|6 | 2.52M/41.5M [00:40<10:43, 63.5kB/s]
+ 6%|6 | 2.52M/41.5M [00:40<11:35, 58.7kB/s]
+ 6%|6 | 2.53M/41.5M [00:40<11:24, 59.6kB/s]
+ 6%|6 | 2.55M/41.5M [00:40<10:32, 64.6kB/s]
+ 6%|6 | 2.55M/41.5M [00:40<10:36, 64.2kB/s]
+ 6%|6 | 2.57M/41.5M [00:40<09:19, 73.0kB/s]
+ 6%|6 | 2.59M/41.5M [00:41<08:36, 79.0kB/s]
+ 6%|6 | 2.59M/41.5M [00:41<09:50, 69.1kB/s]
+ 6%|6 | 2.61M/41.5M [00:41<11:36, 58.5kB/s]
+ 6%|6 | 2.63M/41.5M [00:41<08:38, 78.6kB/s]
+ 6%|6 | 2.65M/41.5M [00:42<08:49, 77.0kB/s]
+ 6%|6 | 2.66M/41.5M [00:42<09:10, 74.0kB/s]
+ 6%|6 | 2.66M/41.5M [00:42<10:15, 66.2kB/s]
+ 6%|6 | 2.67M/41.5M [00:42<11:12, 60.6kB/s]
+ 6%|6 | 2.69M/41.5M [00:42<12:32, 54.1kB/s]
+ 6%|6 | 2.70M/41.5M [00:43<13:53, 48.8kB/s]
+ 7%|6 | 2.71M/41.5M [00:43<12:54, 52.5kB/s]
+ 7%|6 | 2.72M/41.5M [00:43<13:16, 51.1kB/s]
+ 7%|6 | 2.73M/41.5M [00:43<14:15, 47.5kB/s]
+ 7%|6 | 2.75M/41.5M [00:44<11:48, 57.4kB/s]
+ 7%|6 | 2.76M/41.5M [00:44<12:21, 54.8kB/s]
+ 7%|6 | 2.77M/41.5M [00:44<12:51, 52.6kB/s]
+ 7%|6 | 2.78M/41.5M [00:44<10:41, 63.3kB/s]
+ 7%|6 | 2.79M/41.5M [00:44<11:31, 58.6kB/s]
+ 7%|6 | 2.80M/41.5M [00:45<09:54, 68.3kB/s]
+ 7%|6 | 2.82M/41.5M [00:45<08:58, 75.3kB/s]
+ 7%|6 | 2.83M/41.5M [00:45<10:06, 66.9kB/s]
+ 7%|6 | 2.84M/41.5M [00:45<09:04, 74.5kB/s]
+ 7%|6 | 2.86M/41.5M [00:45<10:56, 61.7kB/s]
+ 7%|6 | 2.88M/41.5M [00:46<08:21, 80.7kB/s]
+ 7%|6 | 2.90M/41.5M [00:46<08:02, 83.8kB/s]
+ 7%|7 | 2.91M/41.5M [00:46<07:49, 86.2kB/s]
+ 7%|7 | 2.93M/41.5M [00:46<07:39, 87.9kB/s]
+ 7%|7 | 2.95M/41.5M [00:46<07:32, 89.3kB/s]
+ 7%|7 | 2.96M/41.5M [00:47<09:36, 70.1kB/s]
+ 7%|7 | 2.98M/41.5M [00:47<07:45, 86.8kB/s]
+ 7%|7 | 3.00M/41.5M [00:47<08:04, 83.3kB/s]
+ 7%|7 | 3.02M/41.5M [00:47<09:23, 71.6kB/s]
+ 7%|7 | 3.03M/41.5M [00:48<09:15, 72.6kB/s]
+ 7%|7 | 3.04M/41.5M [00:48<10:08, 66.2kB/s]
+ 7%|7 | 3.05M/41.5M [00:48<11:01, 60.9kB/s]
+ 7%|7 | 3.07M/41.5M [00:48<09:48, 68.5kB/s]
+ 7%|7 | 3.08M/41.5M [00:48<11:17, 59.4kB/s]
+ 7%|7 | 3.09M/41.5M [00:49<11:57, 56.1kB/s]
+ 7%|7 | 3.09M/41.5M [00:49<12:32, 53.5kB/s]
+ 7%|7 | 3.10M/41.5M [00:49<13:00, 51.6kB/s]
+ 7%|7 | 3.11M/41.5M [00:49<13:23, 50.1kB/s]
+ 8%|7 | 3.12M/41.5M [00:49<13:40, 49.0kB/s]
+ 8%|7 | 3.13M/41.5M [00:50<14:00, 47.8kB/s]
+ 8%|7 | 3.15M/41.5M [00:50<11:25, 58.7kB/s]
+ 8%|7 | 3.16M/41.5M [00:50<12:04, 55.5kB/s]
+ 8%|7 | 3.16M/41.5M [00:50<12:35, 53.2kB/s]
+ 8%|7 | 3.18M/41.5M [00:51<13:21, 50.1kB/s]
+ 8%|7 | 3.20M/41.5M [00:51<11:06, 60.2kB/s]
+ 8%|7 | 3.20M/41.5M [00:51<11:47, 56.7kB/s]
+ 8%|7 | 3.21M/41.5M [00:51<12:23, 54.0kB/s]
+ 8%|7 | 3.22M/41.5M [00:51<12:53, 51.9kB/s]
+ 8%|7 | 3.23M/41.5M [00:52<17:01, 39.3kB/s]
+ 8%|7 | 3.25M/41.5M [00:52<10:35, 63.1kB/s]
+ 8%|7 | 3.26M/41.5M [00:52<11:21, 58.9kB/s]
+ 8%|7 | 3.27M/41.5M [00:52<12:27, 53.6kB/s]
+ 8%|7 | 3.29M/41.5M [00:53<10:39, 62.7kB/s]
+ 8%|7 | 3.30M/41.5M [00:53<11:23, 58.6kB/s]
+ 8%|7 | 3.30M/41.5M [00:53<12:02, 55.4kB/s]
+ 8%|8 | 3.32M/41.5M [00:53<11:34, 57.6kB/s]
+ 8%|8 | 3.34M/41.5M [00:53<10:00, 66.6kB/s]
+ 8%|8 | 3.34M/41.5M [00:54<10:53, 61.2kB/s]
+ 8%|8 | 3.35M/41.5M [00:54<11:40, 57.1kB/s]
+ 8%|8 | 3.37M/41.5M [00:54<09:54, 67.2kB/s]
+ 8%|8 | 3.38M/41.5M [00:54<08:55, 74.6kB/s]
+ 8%|8 | 3.40M/41.5M [00:54<08:20, 79.8kB/s]
+ 8%|8 | 3.41M/41.5M [00:54<09:30, 70.0kB/s]
+ 8%|8 | 3.42M/41.5M [00:55<08:40, 76.8kB/s]
+ 8%|8 | 3.44M/41.5M [00:55<08:09, 81.5kB/s]
+ 8%|8 | 3.45M/41.5M [00:55<07:49, 84.9kB/s]
+ 8%|8 | 3.47M/41.5M [00:55<07:37, 87.2kB/s]
+ 8%|8 | 3.48M/41.5M [00:55<08:34, 77.4kB/s]
+ 8%|8 | 3.49M/41.5M [00:56<09:00, 73.8kB/s]
+ 8%|8 | 3.52M/41.5M [00:56<07:38, 86.9kB/s]
+ 9%|8 | 3.53M/41.5M [00:56<07:29, 88.5kB/s]
+ 9%|8 | 3.55M/41.5M [00:56<09:30, 69.7kB/s]
+ 9%|8 | 3.57M/41.5M [00:56<07:13, 91.8kB/s]
+ 9%|8 | 3.59M/41.5M [00:57<07:39, 86.5kB/s]
+ 9%|8 | 3.60M/41.5M [00:57<07:30, 88.2kB/s]
+ 9%|8 | 3.62M/41.5M [00:57<08:28, 78.1kB/s]
+ 9%|8 | 3.63M/41.5M [00:57<08:04, 81.9kB/s]
+ 9%|8 | 3.65M/41.5M [00:57<07:48, 84.7kB/s]
+ 9%|8 | 3.66M/41.5M [00:58<07:36, 87.0kB/s]
+ 9%|8 | 3.68M/41.5M [00:58<09:34, 69.1kB/s]
+ 9%|8 | 3.70M/41.5M [00:58<08:50, 74.7kB/s]
+ 9%|8 | 3.70M/41.5M [00:58<09:46, 67.5kB/s]
+ 9%|8 | 3.72M/41.5M [00:59<09:27, 69.8kB/s]
+ 9%|8 | 3.73M/41.5M [00:59<09:43, 67.9kB/s]
+ 9%|9 | 3.73M/41.5M [00:59<13:45, 48.0kB/s]
+ 9%|9 | 3.76M/41.5M [00:59<10:32, 62.6kB/s]
+ 9%|9 | 3.77M/41.5M [00:59<09:27, 69.7kB/s]
+ 9%|9 | 3.79M/41.5M [01:00<09:18, 70.7kB/s]
+ 9%|9 | 3.80M/41.5M [01:00<12:05, 54.5kB/s]
+ 9%|9 | 3.82M/41.5M [01:00<09:24, 69.9kB/s]
+ 9%|9 | 3.83M/41.5M [01:00<10:14, 64.3kB/s]
+ 9%|9 | 3.84M/41.5M [01:01<09:11, 71.5kB/s]
+ 9%|9 | 3.85M/41.5M [01:01<10:08, 64.8kB/s]
+ 9%|9 | 3.87M/41.5M [01:01<11:33, 56.8kB/s]
+ 9%|9 | 3.89M/41.5M [01:01<08:37, 76.2kB/s]
+ 9%|9 | 3.91M/41.5M [01:01<08:39, 75.8kB/s]
+ 9%|9 | 3.91M/41.5M [01:02<09:01, 72.8kB/s]
+ 9%|9 | 3.92M/41.5M [01:02<10:01, 65.4kB/s]
+ 9%|9 | 3.94M/41.5M [01:02<08:58, 73.2kB/s]
+ 10%|9 | 3.95M/41.5M [01:02<08:19, 78.8kB/s]
+ 10%|9 | 3.97M/41.5M [01:02<08:27, 77.5kB/s]
+ 10%|9 | 3.98M/41.5M [01:03<08:00, 81.8kB/s]
+ 10%|9 | 4.00M/41.5M [01:03<07:42, 84.9kB/s]
+ 10%|9 | 4.02M/41.5M [01:03<07:30, 87.2kB/s]
+ 10%|9 | 4.03M/41.5M [01:03<07:22, 88.8kB/s]
+ 10%|9 | 4.05M/41.5M [01:03<06:45, 96.8kB/s]
+ 10%|9 | 4.06M/41.5M [01:03<06:50, 95.5kB/s]
+ 10%|9 | 4.08M/41.5M [01:04<06:54, 94.7kB/s]
+ 10%|9 | 4.10M/41.5M [01:04<06:02, 108kB/s]
+ 10%|9 | 4.12M/41.5M [01:04<06:18, 104kB/s]
+ 10%|9 | 4.13M/41.5M [01:04<06:30, 100kB/s]
+ 10%|# | 4.16M/41.5M [01:04<05:49, 112kB/s]
+ 10%|# | 4.18M/41.5M [01:04<05:25, 120kB/s]
+ 10%|# | 4.20M/41.5M [01:05<05:10, 126kB/s]
+ 10%|# | 4.23M/41.5M [01:05<05:00, 130kB/s]
+ 10%|# | 4.24M/41.5M [01:05<05:52, 111kB/s]
+ 10%|# | 4.27M/41.5M [01:05<05:05, 128kB/s]
+ 10%|# | 4.29M/41.5M [01:05<05:19, 122kB/s]
+ 10%|# | 4.30M/41.5M [01:06<05:42, 114kB/s]
+ 10%|# | 4.33M/41.5M [01:06<05:21, 121kB/s]
+ 10%|# | 4.34M/41.5M [01:06<05:21, 121kB/s]
+ 11%|# | 4.37M/41.5M [01:06<05:06, 127kB/s]
+ 11%|# | 4.38M/41.5M [01:06<05:59, 108kB/s]
+ 11%|# | 4.41M/41.5M [01:06<05:09, 126kB/s]
+ 11%|# | 4.43M/41.5M [01:07<05:20, 121kB/s]
+ 11%|# | 4.45M/41.5M [01:07<05:43, 113kB/s]
+ 11%|# | 4.47M/41.5M [01:07<05:21, 121kB/s]
+ 11%|# | 4.48M/41.5M [01:07<06:17, 103kB/s]
+ 11%|# | 4.50M/41.5M [01:07<05:54, 109kB/s]
+ 11%|# | 4.52M/41.5M [01:07<05:27, 118kB/s]
+ 11%|# | 4.54M/41.5M [01:08<05:50, 111kB/s]
+ 11%|# | 4.55M/41.5M [01:08<06:07, 105kB/s]
+ 11%|#1 | 4.57M/41.5M [01:08<06:56, 92.9kB/s]
+ 11%|#1 | 4.59M/41.5M [01:08<06:49, 94.6kB/s]
+ 11%|#1 | 4.60M/41.5M [01:08<06:51, 94.1kB/s]
+ 11%|#1 | 4.62M/41.5M [01:09<07:00, 91.9kB/s]
+ 11%|#1 | 4.63M/41.5M [01:09<06:59, 92.2kB/s]
+ 11%|#1 | 4.65M/41.5M [01:09<06:50, 94.2kB/s]
+ 11%|#1 | 4.66M/41.5M [01:09<06:51, 93.7kB/s]
+ 11%|#1 | 4.68M/41.5M [01:09<06:53, 93.4kB/s]
+ 11%|#1 | 4.70M/41.5M [01:09<06:01, 107kB/s]
+ 11%|#1 | 4.71M/41.5M [01:10<05:46, 111kB/s]
+ 11%|#1 | 4.73M/41.5M [01:10<05:34, 115kB/s]
+ 11%|#1 | 4.74M/41.5M [01:10<05:58, 107kB/s]
+ 11%|#1 | 4.76M/41.5M [01:10<06:07, 105kB/s]
+ 12%|#1 | 4.77M/41.5M [01:10<06:21, 101kB/s]
+ 12%|#1 | 4.79M/41.5M [01:10<05:39, 113kB/s]
+ 12%|#1 | 4.80M/41.5M [01:10<05:33, 115kB/s]
+ 12%|#1 | 4.82M/41.5M [01:11<05:13, 123kB/s]
+ 12%|#1 | 4.84M/41.5M [01:11<05:43, 112kB/s]
+ 12%|#1 | 4.85M/41.5M [01:11<05:12, 123kB/s]
+ 12%|#1 | 4.87M/41.5M [01:11<06:35, 97.2kB/s]
+ 12%|#1 | 4.89M/41.5M [01:11<05:48, 110kB/s]
+ 12%|#1 | 4.91M/41.5M [01:11<06:05, 105kB/s]
+ 12%|#1 | 4.92M/41.5M [01:12<06:18, 101kB/s]
+ 12%|#1 | 4.95M/41.5M [01:12<05:40, 113kB/s]
+ 12%|#1 | 4.97M/41.5M [01:12<05:17, 121kB/s]
+ 12%|#2 | 4.98M/41.5M [01:12<05:40, 112kB/s]
+ 12%|#2 | 5.00M/41.5M [01:12<07:48, 81.7kB/s]
+ 12%|#2 | 5.02M/41.5M [01:13<06:38, 96.0kB/s]
+ 12%|#2 | 5.05M/41.5M [01:13<05:19, 119kB/s]
+ 12%|#2 | 5.07M/41.5M [01:13<05:40, 112kB/s]
+ 12%|#2 | 5.09M/41.5M [01:13<05:57, 107kB/s]
+ 12%|#2 | 5.11M/41.5M [01:13<05:28, 116kB/s]
+ 12%|#2 | 5.12M/41.5M [01:14<06:26, 98.7kB/s]
+ 12%|#2 | 5.15M/41.5M [01:14<05:47, 110kB/s]
+ 12%|#2 | 5.16M/41.5M [01:14<06:02, 105kB/s]
+ 12%|#2 | 5.18M/41.5M [01:14<05:36, 113kB/s]
+ 13%|#2 | 5.20M/41.5M [01:14<05:56, 107kB/s]
+ 13%|#2 | 5.21M/41.5M [01:14<06:11, 102kB/s]
+ 13%|#2 | 5.23M/41.5M [01:15<06:22, 99.5kB/s]
+ 13%|#2 | 5.24M/41.5M [01:15<06:29, 97.5kB/s]
+ 13%|#2 | 5.26M/41.5M [01:15<06:35, 96.0kB/s]
+ 13%|#2 | 5.27M/41.5M [01:15<05:50, 108kB/s]
+ 13%|#2 | 5.29M/41.5M [01:15<05:39, 112kB/s]
+ 13%|#2 | 5.30M/41.5M [01:15<05:47, 109kB/s]
+ 13%|#2 | 5.32M/41.5M [01:15<05:34, 113kB/s]
+ 13%|#2 | 5.34M/41.5M [01:16<05:06, 124kB/s]
+ 13%|#2 | 5.35M/41.5M [01:16<04:56, 128kB/s]
+ 13%|#2 | 5.37M/41.5M [01:16<05:29, 115kB/s]
+ 13%|#2 | 5.38M/41.5M [01:16<07:23, 85.3kB/s]
+ 13%|#3 | 5.42M/41.5M [01:16<04:58, 127kB/s]
+ 13%|#3 | 5.44M/41.5M [01:16<04:45, 132kB/s]
+ 13%|#3 | 5.45M/41.5M [01:17<05:15, 120kB/s]
+ 13%|#3 | 5.47M/41.5M [01:17<05:03, 124kB/s]
+ 13%|#3 | 5.48M/41.5M [01:17<07:59, 78.7kB/s]
+ 13%|#3 | 5.52M/41.5M [01:17<04:56, 127kB/s]
+ 13%|#3 | 5.55M/41.5M [01:18<05:15, 119kB/s]
+ 13%|#3 | 5.56M/41.5M [01:18<06:34, 95.4kB/s]
+ 13%|#3 | 5.59M/41.5M [01:18<06:24, 97.9kB/s]
+ 14%|#3 | 5.60M/41.5M [01:18<06:29, 96.6kB/s]
+ 14%|#3 | 5.62M/41.5M [01:18<06:33, 95.6kB/s]
+ 14%|#3 | 5.63M/41.5M [01:19<06:36, 94.9kB/s]
+ 14%|#3 | 5.65M/41.5M [01:19<05:58, 105kB/s]
+ 14%|#3 | 5.66M/41.5M [01:19<05:44, 109kB/s]
+ 14%|#3 | 5.68M/41.5M [01:19<06:01, 104kB/s]
+ 14%|#3 | 5.70M/41.5M [01:19<05:55, 106kB/s]
+ 14%|#3 | 5.71M/41.5M [01:19<05:40, 110kB/s]
+ 14%|#3 | 5.73M/41.5M [01:19<05:59, 104kB/s]
+ 14%|#3 | 5.74M/41.5M [01:20<06:12, 101kB/s]
+ 14%|#3 | 5.76M/41.5M [01:20<05:38, 111kB/s]
+ 14%|#3 | 5.77M/41.5M [01:20<05:29, 114kB/s]
+ 14%|#3 | 5.79M/41.5M [01:20<05:51, 107kB/s]
+ 14%|#3 | 5.80M/41.5M [01:20<05:18, 117kB/s]
+ 14%|#4 | 5.82M/41.5M [01:20<05:27, 114kB/s]
+ 14%|#4 | 5.84M/41.5M [01:20<05:21, 116kB/s]
+ 14%|#4 | 5.85M/41.5M [01:21<04:58, 125kB/s]
+ 14%|#4 | 5.87M/41.5M [01:21<05:01, 124kB/s]
+ 14%|#4 | 5.88M/41.5M [01:21<04:47, 130kB/s]
+ 14%|#4 | 5.90M/41.5M [01:21<04:53, 127kB/s]
+ 14%|#4 | 5.91M/41.5M [01:21<07:27, 83.4kB/s]
+ 14%|#4 | 5.95M/41.5M [01:21<04:25, 140kB/s]
+ 14%|#4 | 5.98M/41.5M [01:22<04:45, 130kB/s]
+ 14%|#4 | 6.00M/41.5M [01:22<04:53, 127kB/s]
+ 14%|#4 | 6.02M/41.5M [01:22<04:41, 132kB/s]
+ 15%|#4 | 6.03M/41.5M [01:22<05:10, 120kB/s]
+ 15%|#4 | 6.05M/41.5M [01:22<05:58, 104kB/s]
+ 15%|#4 | 6.06M/41.5M [01:23<06:09, 101kB/s]
+ 15%|#4 | 6.09M/41.5M [01:23<07:09, 86.5kB/s]
+ 15%|#4 | 6.11M/41.5M [01:23<05:52, 105kB/s]
+ 15%|#4 | 6.12M/41.5M [01:23<06:03, 102kB/s]
+ 15%|#4 | 6.14M/41.5M [01:23<06:13, 99.3kB/s]
+ 15%|#4 | 6.16M/41.5M [01:24<06:20, 97.3kB/s]
+ 15%|#4 | 6.17M/41.5M [01:24<10:08, 60.8kB/s]
+ 15%|#4 | 6.20M/41.5M [01:24<07:06, 86.7kB/s]
+ 15%|#4 | 6.22M/41.5M [01:24<07:44, 79.7kB/s]
+ 15%|#5 | 6.23M/41.5M [01:25<07:27, 82.6kB/s]
+ 15%|#5 | 6.25M/41.5M [01:25<07:14, 85.1kB/s]
+ 15%|#5 | 6.27M/41.5M [01:25<07:04, 87.1kB/s]
+ 15%|#5 | 6.28M/41.5M [01:25<06:56, 88.6kB/s]
+ 15%|#5 | 6.30M/41.5M [01:25<06:50, 89.8kB/s]
+ 15%|#5 | 6.31M/41.5M [01:26<06:46, 90.6kB/s]
+ 15%|#5 | 6.33M/41.5M [01:26<07:47, 78.8kB/s]
+ 15%|#5 | 6.35M/41.5M [01:26<07:16, 84.4kB/s]
+ 15%|#5 | 6.37M/41.5M [01:26<07:05, 86.5kB/s]
+ 15%|#5 | 6.38M/41.5M [01:26<06:57, 88.2kB/s]
+ 15%|#5 | 6.40M/41.5M [01:27<06:51, 89.4kB/s]
+ 15%|#5 | 6.41M/41.5M [01:27<06:46, 90.4kB/s]
+ 15%|#5 | 6.43M/41.5M [01:27<06:43, 91.1kB/s]
+ 16%|#5 | 6.45M/41.5M [01:27<06:41, 91.6kB/s]
+ 16%|#5 | 6.46M/41.5M [01:27<08:36, 71.1kB/s]
+ 16%|#5 | 6.49M/41.5M [01:28<06:10, 99.1kB/s]
+ 16%|#5 | 6.51M/41.5M [01:28<06:17, 97.3kB/s]
+ 16%|#5 | 6.52M/41.5M [01:28<06:21, 96.1kB/s]
+ 16%|#5 | 6.54M/41.5M [01:28<06:24, 95.3kB/s]
+ 16%|#5 | 6.55M/41.5M [01:28<06:27, 94.6kB/s]
+ 16%|#5 | 6.57M/41.5M [01:29<06:29, 94.1kB/s]
+ 16%|#5 | 6.59M/41.5M [01:29<06:30, 93.7kB/s]
+ 16%|#5 | 6.60M/41.5M [01:29<06:31, 93.4kB/s]
+ 16%|#5 | 6.62M/41.5M [01:29<06:32, 93.2kB/s]
+ 16%|#5 | 6.63M/41.5M [01:29<06:32, 93.1kB/s]
+ 16%|#6 | 6.65M/41.5M [01:30<08:30, 71.6kB/s]
+ 16%|#6 | 6.68M/41.5M [01:30<06:05, 99.8kB/s]
+ 16%|#6 | 6.70M/41.5M [01:30<06:12, 97.9kB/s]
+ 16%|#6 | 6.71M/41.5M [01:30<08:01, 75.8kB/s]
+ 16%|#6 | 6.74M/41.5M [01:31<05:59, 101kB/s]
+ 16%|#6 | 6.76M/41.5M [01:31<06:06, 99.3kB/s]
+ 16%|#6 | 6.77M/41.5M [01:31<06:13, 97.5kB/s]
+ 16%|#6 | 6.79M/41.5M [01:31<06:42, 90.4kB/s]
+ 16%|#6 | 6.80M/41.5M [01:31<07:09, 84.6kB/s]
+ 16%|#6 | 6.82M/41.5M [01:31<06:27, 93.7kB/s]
+ 16%|#6 | 6.84M/41.5M [01:32<06:28, 93.4kB/s]
+ 17%|#6 | 6.85M/41.5M [01:32<08:23, 72.1kB/s]
+ 17%|#6 | 6.88M/41.5M [01:32<06:26, 93.8kB/s]
+ 17%|#6 | 6.89M/41.5M [01:32<07:22, 82.1kB/s]
+ 17%|#6 | 6.91M/41.5M [01:33<07:07, 84.8kB/s]
+ 17%|#6 | 6.93M/41.5M [01:33<06:51, 88.0kB/s]
+ 17%|#6 | 6.95M/41.5M [01:33<06:46, 89.2kB/s]
+ 17%|#6 | 6.96M/41.5M [01:33<06:41, 90.1kB/s]
+ 17%|#6 | 6.98M/41.5M [01:33<06:38, 90.9kB/s]
+ 17%|#6 | 6.99M/41.5M [01:34<06:35, 91.4kB/s]
+ 17%|#6 | 7.02M/41.5M [01:34<07:22, 81.6kB/s]
+ 17%|#6 | 7.05M/41.5M [01:34<05:38, 107kB/s]
+ 17%|#7 | 7.06M/41.5M [01:34<05:50, 103kB/s]
+ 17%|#7 | 7.08M/41.5M [01:34<05:59, 100kB/s]
+ 17%|#7 | 7.09M/41.5M [01:35<06:07, 98.2kB/s]
+ 17%|#7 | 7.11M/41.5M [01:35<07:59, 75.2kB/s]
+ 17%|#7 | 7.13M/41.5M [01:35<06:37, 90.6kB/s]
+ 17%|#7 | 7.15M/41.5M [01:35<06:35, 91.1kB/s]
+ 17%|#7 | 7.16M/41.5M [01:35<06:33, 91.5kB/s]
+ 17%|#7 | 7.18M/41.5M [01:36<06:31, 91.9kB/s]
+ 17%|#7 | 7.20M/41.5M [01:36<06:30, 92.1kB/s]
+ 17%|#7 | 7.21M/41.5M [01:36<06:29, 92.3kB/s]
+ 17%|#7 | 7.23M/41.5M [01:36<06:28, 92.5kB/s]
+ 17%|#7 | 7.24M/41.5M [01:36<06:04, 98.4kB/s]
+ 18%|#7 | 7.27M/41.5M [01:37<05:43, 104kB/s]
+ 18%|#7 | 7.28M/41.5M [01:37<05:55, 101kB/s]
+ 18%|#7 | 7.30M/41.5M [01:37<07:51, 76.1kB/s]
+ 18%|#7 | 7.33M/41.5M [01:37<05:31, 108kB/s]
+ 18%|#7 | 7.34M/41.5M [01:37<06:03, 98.6kB/s]
+ 18%|#7 | 7.36M/41.5M [01:38<07:27, 80.0kB/s]
+ 18%|#7 | 7.38M/41.5M [01:38<07:31, 79.2kB/s]
+ 18%|#7 | 7.39M/41.5M [01:38<08:59, 66.2kB/s]
+ 18%|#7 | 7.41M/41.5M [01:38<08:15, 72.1kB/s]
+ 18%|#7 | 7.41M/41.5M [01:39<11:12, 53.1kB/s]
+ 18%|#7 | 7.44M/41.5M [01:39<08:17, 71.7kB/s]
+ 18%|#7 | 7.45M/41.5M [01:39<09:35, 62.1kB/s]
+ 18%|#7 | 7.46M/41.5M [01:40<10:07, 58.7kB/s]
+ 18%|#8 | 7.48M/41.5M [01:40<09:25, 63.1kB/s]
+ 18%|#8 | 7.48M/41.5M [01:40<09:28, 62.7kB/s]
+ 18%|#8 | 7.49M/41.5M [01:40<10:10, 58.4kB/s]
+ 18%|#8 | 7.51M/41.5M [01:40<08:44, 67.9kB/s]
+ 18%|#8 | 7.52M/41.5M [01:40<09:36, 61.8kB/s]
+ 18%|#8 | 7.53M/41.5M [01:41<08:23, 70.7kB/s]
+ 18%|#8 | 7.55M/41.5M [01:41<07:41, 77.1kB/s]
+ 18%|#8 | 7.56M/41.5M [01:41<07:15, 81.7kB/s]
+ 18%|#8 | 7.58M/41.5M [01:41<07:27, 79.4kB/s]
+ 18%|#8 | 7.59M/41.5M [01:41<07:53, 75.1kB/s]
+ 18%|#8 | 7.59M/41.5M [01:42<09:34, 61.9kB/s]
+ 18%|#8 | 7.62M/41.5M [01:42<06:34, 90.1kB/s]
+ 18%|#8 | 7.63M/41.5M [01:42<06:30, 90.9kB/s]
+ 18%|#8 | 7.65M/41.5M [01:42<08:25, 70.2kB/s]
+ 18%|#8 | 7.67M/41.5M [01:42<06:45, 87.4kB/s]
+ 19%|#8 | 7.69M/41.5M [01:43<08:24, 70.3kB/s]
+ 19%|#8 | 7.70M/41.5M [01:43<07:49, 75.5kB/s]
+ 19%|#8 | 7.72M/41.5M [01:43<09:13, 64.0kB/s]
+ 19%|#8 | 7.73M/41.5M [01:43<08:23, 70.3kB/s]
+ 19%|#8 | 7.74M/41.5M [01:44<09:07, 64.6kB/s]
+ 19%|#8 | 7.76M/41.5M [01:44<14:48, 39.8kB/s]
+ 19%|#8 | 7.79M/41.5M [01:44<08:57, 65.7kB/s]
+ 19%|#8 | 7.80M/41.5M [01:45<08:17, 71.1kB/s]
+ 19%|#8 | 7.82M/41.5M [01:45<11:10, 52.7kB/s]
+ 19%|#8 | 7.84M/41.5M [01:45<08:37, 68.2kB/s]
+ 19%|#8 | 7.86M/41.5M [01:46<09:40, 60.8kB/s]
+ 19%|#8 | 7.88M/41.5M [01:46<08:45, 67.0kB/s]
+ 19%|#9 | 7.89M/41.5M [01:46<08:05, 72.6kB/s]
+ 19%|#9 | 7.91M/41.5M [01:46<07:59, 73.4kB/s]
+ 19%|#9 | 7.92M/41.5M [01:47<08:54, 65.8kB/s]
+ 19%|#9 | 7.94M/41.5M [01:47<08:09, 71.9kB/s]
+ 19%|#9 | 7.95M/41.5M [01:47<07:36, 77.0kB/s]
+ 19%|#9 | 7.97M/41.5M [01:47<07:13, 81.1kB/s]
+ 19%|#9 | 7.98M/41.5M [01:47<08:50, 66.2kB/s]
+ 19%|#9 | 8.00M/41.5M [01:48<08:04, 72.4kB/s]
+ 19%|#9 | 8.01M/41.5M [01:48<08:52, 65.9kB/s]
+ 19%|#9 | 8.02M/41.5M [01:48<08:01, 72.9kB/s]
+ 19%|#9 | 8.04M/41.5M [01:48<07:27, 78.4kB/s]
+ 19%|#9 | 8.05M/41.5M [01:49<09:04, 64.3kB/s]
+ 19%|#9 | 8.08M/41.5M [01:49<07:30, 77.8kB/s]
+ 20%|#9 | 8.09M/41.5M [01:49<08:30, 68.6kB/s]
+ 20%|#9 | 8.11M/41.5M [01:49<10:31, 55.4kB/s]
+ 20%|#9 | 8.14M/41.5M [01:50<07:56, 73.4kB/s]
+ 20%|#9 | 8.16M/41.5M [01:50<07:54, 73.7kB/s]
+ 20%|#9 | 8.16M/41.5M [01:50<08:08, 71.5kB/s]
+ 20%|#9 | 8.18M/41.5M [01:50<07:35, 76.7kB/s]
+ 20%|#9 | 8.20M/41.5M [01:51<07:38, 76.2kB/s]
+ 20%|#9 | 8.20M/41.5M [01:51<11:14, 51.7kB/s]
+ 20%|#9 | 8.23M/41.5M [01:52<10:53, 53.4kB/s]
+ 20%|#9 | 8.26M/41.5M [01:52<08:32, 68.0kB/s]
+ 20%|#9 | 8.27M/41.5M [01:52<08:12, 70.7kB/s]
+ 20%|#9 | 8.29M/41.5M [01:52<09:20, 62.1kB/s]
+ 20%|#9 | 8.30M/41.5M [01:52<09:50, 59.0kB/s]
+ 20%|## | 8.30M/41.5M [01:53<09:57, 58.2kB/s]
+ 20%|## | 8.32M/41.5M [01:53<09:01, 64.2kB/s]
+ 20%|## | 8.33M/41.5M [01:53<09:18, 62.3kB/s]
+ 20%|## | 8.34M/41.5M [01:54<17:56, 32.3kB/s]
+ 20%|## | 8.39M/41.5M [01:54<08:07, 71.2kB/s]
+ 20%|## | 8.41M/41.5M [01:54<07:41, 75.1kB/s]
+ 20%|## | 8.42M/41.5M [01:55<08:51, 65.3kB/s]
+ 20%|## | 8.44M/41.5M [01:55<08:10, 70.7kB/s]
+ 20%|## | 8.45M/41.5M [01:55<07:38, 75.5kB/s]
+ 20%|## | 8.47M/41.5M [01:55<07:14, 79.6kB/s]
+ 20%|## | 8.48M/41.5M [01:55<06:57, 82.9kB/s]
+ 20%|## | 8.50M/41.5M [01:55<06:44, 85.6kB/s]
+ 21%|## | 8.52M/41.5M [01:56<06:34, 87.5kB/s]
+ 21%|## | 8.53M/41.5M [01:56<06:28, 89.0kB/s]
+ 21%|## | 8.55M/41.5M [01:56<06:23, 90.1kB/s]
+ 21%|## | 8.56M/41.5M [01:56<06:19, 90.9kB/s]
+ 21%|## | 8.58M/41.5M [01:56<06:17, 91.4kB/s]
+ 21%|## | 8.59M/41.5M [01:56<06:15, 91.8kB/s]
+ 21%|## | 8.61M/41.5M [01:57<06:14, 92.1kB/s]
+ 21%|## | 8.62M/41.5M [01:57<08:05, 71.0kB/s]
+ 21%|## | 8.65M/41.5M [01:57<06:32, 87.8kB/s]
+ 21%|## | 8.66M/41.5M [01:57<06:26, 89.1kB/s]
+ 21%|## | 8.68M/41.5M [01:58<06:21, 90.1kB/s]
+ 21%|## | 8.70M/41.5M [01:58<05:31, 104kB/s]
+ 21%|##1 | 8.72M/41.5M [01:58<07:42, 74.3kB/s]
+ 21%|##1 | 8.76M/41.5M [01:58<04:52, 117kB/s]
+ 21%|##1 | 8.77M/41.5M [01:58<05:09, 111kB/s]
+ 21%|##1 | 8.79M/41.5M [01:59<05:23, 106kB/s]
+ 21%|##1 | 8.80M/41.5M [01:59<05:35, 102kB/s]
+ 21%|##1 | 8.82M/41.5M [01:59<05:43, 99.6kB/s]
+ 21%|##1 | 8.84M/41.5M [01:59<05:50, 97.6kB/s]
+ 21%|##1 | 8.86M/41.5M [01:59<05:11, 110kB/s]
+ 21%|##1 | 8.88M/41.5M [01:59<05:26, 105kB/s]
+ 21%|##1 | 8.90M/41.5M [02:00<04:57, 115kB/s]
+ 21%|##1 | 8.91M/41.5M [02:00<08:23, 67.8kB/s]
+ 22%|##1 | 8.97M/41.5M [02:00<05:07, 111kB/s]
+ 22%|##1 | 8.98M/41.5M [02:01<06:27, 88.0kB/s]
+ 22%|##1 | 9.01M/41.5M [02:01<05:46, 98.3kB/s]
+ 22%|##1 | 9.02M/41.5M [02:01<05:50, 97.1kB/s]
+ 22%|##1 | 9.04M/41.5M [02:01<05:54, 96.0kB/s]
+ 22%|##1 | 9.05M/41.5M [02:02<05:57, 95.2kB/s]
+ 22%|##1 | 9.07M/41.5M [02:02<05:59, 94.5kB/s]
+ 22%|##1 | 9.09M/41.5M [02:02<06:01, 94.0kB/s]
+ 22%|##1 | 9.10M/41.5M [02:02<06:02, 93.7kB/s]
+ 22%|##1 | 9.12M/41.5M [02:02<06:03, 93.4kB/s]
+ 22%|##2 | 9.13M/41.5M [02:02<06:03, 93.2kB/s]
+ 22%|##2 | 9.15M/41.5M [02:03<06:04, 93.1kB/s]
+ 22%|##2 | 9.16M/41.5M [02:03<06:04, 93.0kB/s]
+ 22%|##2 | 9.18M/41.5M [02:03<05:42, 99.0kB/s]
+ 22%|##2 | 9.20M/41.5M [02:03<05:48, 97.0kB/s]
+ 22%|##2 | 9.21M/41.5M [02:03<08:05, 69.7kB/s]
+ 22%|##2 | 9.25M/41.5M [02:04<04:55, 115kB/s]
+ 22%|##2 | 9.27M/41.5M [02:04<05:10, 109kB/s]
+ 22%|##2 | 9.29M/41.5M [02:04<04:48, 117kB/s]
+ 22%|##2 | 9.30M/41.5M [02:04<05:06, 110kB/s]
+ 22%|##2 | 9.33M/41.5M [02:04<04:44, 119kB/s]
+ 23%|##2 | 9.34M/41.5M [02:04<05:03, 111kB/s]
+ 23%|##2 | 9.37M/41.5M [02:05<04:42, 119kB/s]
+ 23%|##2 | 9.39M/41.5M [02:05<04:29, 125kB/s]
+ 23%|##2 | 9.41M/41.5M [02:05<04:51, 116kB/s]
+ 23%|##2 | 9.43M/41.5M [02:05<04:34, 123kB/s]
+ 23%|##2 | 9.45M/41.5M [02:05<04:23, 128kB/s]
+ 23%|##2 | 9.47M/41.5M [02:06<06:51, 81.7kB/s]
+ 23%|##2 | 9.52M/41.5M [02:06<04:09, 134kB/s]
+ 23%|##2 | 9.54M/41.5M [02:06<04:57, 112kB/s]
+ 23%|##3 | 9.55M/41.5M [02:06<05:10, 108kB/s]
+ 23%|##3 | 9.58M/41.5M [02:07<07:14, 77.0kB/s]
+ 23%|##3 | 9.63M/41.5M [02:07<04:38, 120kB/s]
+ 23%|##3 | 9.65M/41.5M [02:07<04:39, 119kB/s]
+ 23%|##3 | 9.66M/41.5M [02:07<04:55, 113kB/s]
+ 23%|##3 | 9.68M/41.5M [02:08<05:09, 108kB/s]
+ 23%|##3 | 9.70M/41.5M [02:08<05:21, 104kB/s]
+ 23%|##3 | 9.72M/41.5M [02:08<05:09, 108kB/s]
+ 23%|##3 | 9.73M/41.5M [02:08<05:21, 104kB/s]
+ 23%|##3 | 9.75M/41.5M [02:08<05:12, 107kB/s]
+ 24%|##3 | 9.77M/41.5M [02:09<05:03, 110kB/s]
+ 24%|##3 | 9.79M/41.5M [02:09<04:58, 111kB/s]
+ 24%|##3 | 9.80M/41.5M [02:09<07:09, 77.4kB/s]
+ 24%|##3 | 9.84M/41.5M [02:09<05:05, 108kB/s]
+ 24%|##3 | 9.85M/41.5M [02:09<05:18, 104kB/s]
+ 24%|##3 | 9.87M/41.5M [02:10<05:27, 101kB/s]
+ 24%|##3 | 9.89M/41.5M [02:10<04:56, 112kB/s]
+ 24%|##3 | 9.91M/41.5M [02:10<05:11, 106kB/s]
+ 24%|##3 | 9.92M/41.5M [02:10<06:57, 79.2kB/s]
+ 24%|##4 | 9.96M/41.5M [02:11<04:57, 111kB/s]
+ 24%|##4 | 9.98M/41.5M [02:11<05:10, 107kB/s]
+ 24%|##4 | 10.0M/41.5M [02:11<04:46, 115kB/s]
+ 24%|##4 | 10.0M/41.5M [02:11<06:25, 85.5kB/s]
+ 24%|##4 | 10.0M/41.5M [02:11<05:02, 109kB/s]
+ 24%|##4 | 10.1M/41.5M [02:12<05:14, 105kB/s]
+ 24%|##4 | 10.1M/41.5M [02:12<05:23, 102kB/s]
+ 24%|##4 | 10.1M/41.5M [02:12<08:33, 64.1kB/s]
+ 24%|##4 | 10.1M/41.5M [02:12<05:35, 97.9kB/s]
+ 24%|##4 | 10.1M/41.5M [02:13<06:56, 79.0kB/s]
+ 24%|##4 | 10.2M/41.5M [02:13<06:40, 81.9kB/s]
+ 25%|##4 | 10.2M/41.5M [02:13<06:29, 84.4kB/s]
+ 25%|##4 | 10.2M/41.5M [02:14<07:52, 69.4kB/s]
+ 25%|##4 | 10.2M/41.5M [02:14<10:58, 49.8kB/s]
+ 25%|##4 | 10.2M/41.5M [02:14<08:00, 68.2kB/s]
+ 25%|##4 | 10.2M/41.5M [02:15<08:12, 66.6kB/s]
+ 25%|##4 | 10.3M/41.5M [02:15<08:23, 65.1kB/s]
+ 25%|##4 | 10.3M/41.5M [02:15<11:46, 46.3kB/s]
+ 25%|##4 | 10.3M/41.5M [02:16<08:30, 64.0kB/s]
+ 25%|##4 | 10.3M/41.5M [02:16<07:52, 69.1kB/s]
+ 25%|##4 | 10.3M/41.5M [02:16<08:51, 61.4kB/s]
+ 25%|##4 | 10.4M/41.5M [02:16<08:03, 67.5kB/s]
+ 25%|##5 | 10.4M/41.5M [02:17<07:26, 73.0kB/s]
+ 25%|##5 | 10.4M/41.5M [02:17<10:33, 51.5kB/s]
+ 25%|##5 | 10.4M/41.5M [02:17<06:57, 78.0kB/s]
+ 25%|##5 | 10.4M/41.5M [02:18<08:17, 65.4kB/s]
+ 25%|##5 | 10.5M/41.5M [02:18<08:58, 60.4kB/s]
+ 25%|##5 | 10.5M/41.5M [02:18<09:23, 57.7kB/s]
+ 25%|##5 | 10.5M/41.5M [02:18<09:48, 55.3kB/s]
+ 25%|##5 | 10.5M/41.5M [02:18<10:11, 53.2kB/s]
+ 25%|##5 | 10.5M/41.5M [02:19<08:34, 63.1kB/s]
+ 25%|##5 | 10.5M/41.5M [02:19<09:13, 58.7kB/s]
+ 25%|##5 | 10.5M/41.5M [02:19<09:47, 55.3kB/s]
+ 25%|##5 | 10.5M/41.5M [02:19<08:13, 65.9kB/s]
+ 25%|##5 | 10.5M/41.5M [02:19<08:58, 60.3kB/s]
+ 25%|##5 | 10.5M/41.5M [02:20<07:45, 69.7kB/s]
+ 25%|##5 | 10.6M/41.5M [02:20<08:35, 62.9kB/s]
+ 25%|##5 | 10.6M/41.5M [02:20<07:32, 71.7kB/s]
+ 26%|##5 | 10.6M/41.5M [02:20<08:59, 60.1kB/s]
+ 26%|##5 | 10.6M/41.5M [02:20<06:46, 79.6kB/s]
+ 26%|##5 | 10.6M/41.5M [02:21<06:29, 83.0kB/s]
+ 26%|##5 | 10.6M/41.5M [02:21<08:00, 67.4kB/s]
+ 26%|##5 | 10.7M/41.5M [02:21<07:21, 73.3kB/s]
+ 26%|##5 | 10.7M/41.5M [02:21<06:53, 78.2kB/s]
+ 26%|##5 | 10.7M/41.5M [02:22<08:18, 64.8kB/s]
+ 26%|##5 | 10.7M/41.5M [02:22<06:34, 81.9kB/s]
+ 26%|##5 | 10.7M/41.5M [02:22<09:51, 54.5kB/s]
+ 26%|##5 | 10.8M/41.5M [02:23<07:39, 70.1kB/s]
+ 26%|##5 | 10.8M/41.5M [02:23<08:40, 61.9kB/s]
+ 26%|##5 | 10.8M/41.5M [02:23<08:41, 61.7kB/s]
+ 26%|##6 | 10.8M/41.5M [02:23<08:13, 65.3kB/s]
+ 26%|##6 | 10.8M/41.5M [02:23<08:19, 64.4kB/s]
+ 26%|##6 | 10.8M/41.5M [02:24<07:55, 67.7kB/s]
+ 26%|##6 | 10.8M/41.5M [02:24<08:05, 66.2kB/s]
+ 26%|##6 | 10.8M/41.5M [02:24<07:45, 69.1kB/s]
+ 26%|##6 | 10.8M/41.5M [02:24<07:58, 67.2kB/s]
+ 26%|##6 | 10.9M/41.5M [02:24<07:09, 74.8kB/s]
+ 26%|##6 | 10.9M/41.5M [02:24<06:40, 80.1kB/s]
+ 26%|##6 | 10.9M/41.5M [02:25<06:22, 83.9kB/s]
+ 26%|##6 | 10.9M/41.5M [02:25<06:10, 86.6kB/s]
+ 26%|##6 | 10.9M/41.5M [02:25<05:35, 95.5kB/s]
+ 26%|##6 | 10.9M/41.5M [02:25<06:51, 77.8kB/s]
+ 26%|##6 | 11.0M/41.5M [02:26<06:03, 88.0kB/s]
+ 26%|##6 | 11.0M/41.5M [02:26<07:29, 71.2kB/s]
+ 27%|##6 | 11.0M/41.5M [02:26<06:09, 86.4kB/s]
+ 27%|##6 | 11.0M/41.5M [02:26<07:31, 70.8kB/s]
+ 27%|##6 | 11.0M/41.5M [02:27<08:07, 65.5kB/s]
+ 27%|##6 | 11.0M/41.5M [02:27<08:43, 60.9kB/s]
+ 27%|##6 | 11.0M/41.5M [02:27<09:17, 57.3kB/s]
+ 27%|##6 | 11.1M/41.5M [02:27<07:58, 66.7kB/s]
+ 27%|##6 | 11.1M/41.5M [02:27<08:42, 61.1kB/s]
+ 27%|##6 | 11.1M/41.5M [02:28<09:19, 57.0kB/s]
+ 27%|##6 | 11.1M/41.5M [02:28<10:10, 52.2kB/s]
+ 27%|##6 | 11.1M/41.5M [02:28<08:33, 62.0kB/s]
+ 27%|##6 | 11.1M/41.5M [02:28<09:09, 58.0kB/s]
+ 27%|##6 | 11.1M/41.5M [02:28<09:40, 54.9kB/s]
+ 27%|##6 | 11.1M/41.5M [02:29<08:06, 65.4kB/s]
+ 27%|##6 | 11.1M/41.5M [02:29<08:50, 60.0kB/s]
+ 27%|##6 | 11.2M/41.5M [02:29<07:38, 69.4kB/s]
+ 27%|##6 | 11.2M/41.5M [02:29<08:26, 62.7kB/s]
+ 27%|##6 | 11.2M/41.5M [02:29<09:08, 57.9kB/s]
+ 27%|##6 | 11.2M/41.5M [02:29<07:45, 68.2kB/s]
+ 27%|##7 | 11.2M/41.5M [02:30<07:00, 75.5kB/s]
+ 27%|##7 | 11.2M/41.5M [02:30<06:33, 80.7kB/s]
+ 27%|##7 | 11.2M/41.5M [02:30<06:16, 84.3kB/s]
+ 27%|##7 | 11.3M/41.5M [02:30<06:05, 86.8kB/s]
+ 27%|##7 | 11.3M/41.5M [02:30<05:57, 88.6kB/s]
+ 27%|##7 | 11.3M/41.5M [02:31<05:52, 89.8kB/s]
+ 27%|##7 | 11.3M/41.5M [02:31<05:48, 90.7kB/s]
+ 27%|##7 | 11.3M/41.5M [02:31<05:46, 91.3kB/s]
+ 27%|##7 | 11.3M/41.5M [02:31<05:44, 91.7kB/s]
+ 27%|##7 | 11.4M/41.5M [02:31<04:58, 106kB/s]
+ 27%|##7 | 11.4M/41.5M [02:31<05:09, 102kB/s]
+ 27%|##7 | 11.4M/41.5M [02:32<05:18, 99.2kB/s]
+ 28%|##7 | 11.4M/41.5M [02:32<06:08, 85.5kB/s]
+ 28%|##7 | 11.4M/41.5M [02:32<05:19, 98.7kB/s]
+ 28%|##7 | 11.5M/41.5M [02:32<05:24, 97.0kB/s]
+ 28%|##7 | 11.5M/41.5M [02:33<06:57, 75.5kB/s]
+ 28%|##7 | 11.5M/41.5M [02:33<06:36, 79.3kB/s]
+ 28%|##7 | 11.5M/41.5M [02:33<06:20, 82.7kB/s]
+ 28%|##7 | 11.5M/41.5M [02:34<09:21, 56.0kB/s]
+ 28%|##7 | 11.5M/41.5M [02:34<07:14, 72.3kB/s]
+ 28%|##7 | 11.6M/41.5M [02:34<07:10, 72.9kB/s]
+ 28%|##7 | 11.6M/41.5M [02:34<07:55, 66.0kB/s]
+ 28%|##7 | 11.6M/41.5M [02:34<07:16, 71.8kB/s]
+ 28%|##7 | 11.6M/41.5M [02:35<06:48, 76.8kB/s]
+ 28%|##8 | 11.6M/41.5M [02:35<06:51, 76.2kB/s]
+ 28%|##8 | 11.6M/41.5M [02:35<07:08, 73.1kB/s]
+ 28%|##8 | 11.6M/41.5M [02:35<06:38, 78.6kB/s]
+ 28%|##8 | 11.7M/41.5M [02:35<06:18, 82.6kB/s]
+ 28%|##8 | 11.7M/41.5M [02:35<06:05, 85.6kB/s]
+ 28%|##8 | 11.7M/41.5M [02:36<05:56, 87.7kB/s]
+ 28%|##8 | 11.7M/41.5M [02:36<05:03, 103kB/s]
+ 28%|##8 | 11.7M/41.5M [02:36<07:07, 73.0kB/s]
+ 28%|##8 | 11.8M/41.5M [02:36<04:55, 105kB/s]
+ 28%|##8 | 11.8M/41.5M [02:37<05:24, 96.0kB/s]
+ 28%|##8 | 11.8M/41.5M [02:37<05:27, 95.2kB/s]
+ 28%|##8 | 11.8M/41.5M [02:37<06:37, 78.3kB/s]
+ 29%|##8 | 11.8M/41.5M [02:37<05:34, 93.1kB/s]
+ 29%|##8 | 11.8M/41.5M [02:38<08:26, 61.4kB/s]
+ 29%|##8 | 11.9M/41.5M [02:38<06:25, 80.6kB/s]
+ 29%|##8 | 11.9M/41.5M [02:38<06:28, 79.9kB/s]
+ 29%|##8 | 11.9M/41.5M [02:38<06:14, 82.7kB/s]
+ 29%|##8 | 11.9M/41.5M [02:39<06:04, 85.2kB/s]
+ 29%|##8 | 11.9M/41.5M [02:39<07:08, 72.2kB/s]
+ 29%|##8 | 12.0M/41.5M [02:39<07:00, 73.7kB/s]
+ 29%|##8 | 12.0M/41.5M [02:39<06:35, 78.3kB/s]
+ 29%|##8 | 12.0M/41.5M [02:39<06:17, 82.0kB/s]
+ 29%|##8 | 12.0M/41.5M [02:40<07:42, 66.9kB/s]
+ 29%|##8 | 12.0M/41.5M [02:40<05:52, 87.5kB/s]
+ 29%|##9 | 12.0M/41.5M [02:40<07:16, 70.7kB/s]
+ 29%|##9 | 12.1M/41.5M [02:40<06:47, 75.7kB/s]
+ 29%|##9 | 12.1M/41.5M [02:41<06:44, 76.2kB/s]
+ 29%|##9 | 12.1M/41.5M [02:41<07:40, 66.9kB/s]
+ 29%|##9 | 12.1M/41.5M [02:41<07:03, 72.8kB/s]
+ 29%|##9 | 12.1M/41.5M [02:41<07:44, 66.3kB/s]
+ 29%|##9 | 12.1M/41.5M [02:42<07:00, 73.2kB/s]
+ 29%|##9 | 12.1M/41.5M [02:42<06:32, 78.5kB/s]
+ 29%|##9 | 12.1M/41.5M [02:42<09:25, 54.4kB/s]
+ 29%|##9 | 12.2M/41.5M [02:42<05:59, 85.5kB/s]
+ 29%|##9 | 12.2M/41.5M [02:43<08:49, 58.0kB/s]
+ 29%|##9 | 12.2M/41.5M [02:43<06:12, 82.5kB/s]
+ 30%|##9 | 12.2M/41.5M [02:43<06:40, 76.6kB/s]
+ 30%|##9 | 12.3M/41.5M [02:44<07:05, 72.1kB/s]
+ 30%|##9 | 12.3M/41.5M [02:44<08:30, 60.0kB/s]
+ 30%|##9 | 12.3M/41.5M [02:44<07:17, 70.0kB/s]
+ 30%|##9 | 12.3M/41.5M [02:44<06:47, 75.2kB/s]
+ 30%|##9 | 12.3M/41.5M [02:45<07:59, 63.8kB/s]
+ 30%|##9 | 12.3M/41.5M [02:45<08:51, 57.5kB/s]
+ 30%|##9 | 12.3M/41.5M [02:45<09:12, 55.3kB/s]
+ 30%|##9 | 12.4M/41.5M [02:45<07:58, 63.8kB/s]
+ 30%|##9 | 12.4M/41.5M [02:45<08:32, 59.5kB/s]
+ 30%|##9 | 12.4M/41.5M [02:46<07:27, 68.1kB/s]
+ 30%|##9 | 12.4M/41.5M [02:46<06:48, 74.6kB/s]
+ 30%|##9 | 12.4M/41.5M [02:46<07:35, 66.9kB/s]
+ 30%|##9 | 12.4M/41.5M [02:46<06:51, 74.1kB/s]
+ 30%|##9 | 12.4M/41.5M [02:47<08:13, 61.7kB/s]
+ 30%|### | 12.5M/41.5M [02:47<06:40, 76.0kB/s]
+ 30%|### | 12.5M/41.5M [02:47<06:56, 73.1kB/s]
+ 30%|### | 12.5M/41.5M [02:47<07:42, 65.8kB/s]
+ 30%|### | 12.5M/41.5M [02:47<08:50, 57.3kB/s]
+ 30%|### | 12.5M/41.5M [02:48<06:35, 76.8kB/s]
+ 30%|### | 12.5M/41.5M [02:48<06:39, 76.1kB/s]
+ 30%|### | 12.5M/41.5M [02:48<06:55, 73.1kB/s]
+ 30%|### | 12.5M/41.5M [02:48<07:42, 65.6kB/s]
+ 30%|### | 12.6M/41.5M [02:48<06:53, 73.4kB/s]
+ 30%|### | 12.6M/41.5M [02:48<06:23, 79.0kB/s]
+ 30%|### | 12.6M/41.5M [02:49<08:18, 60.8kB/s]
+ 30%|### | 12.6M/41.5M [02:49<06:00, 83.9kB/s]
+ 30%|### | 12.6M/41.5M [02:49<06:35, 76.4kB/s]
+ 30%|### | 12.6M/41.5M [02:49<06:15, 80.5kB/s]
+ 31%|### | 12.7M/41.5M [02:50<06:49, 73.8kB/s]
+ 31%|### | 12.7M/41.5M [02:50<06:24, 78.5kB/s]
+ 31%|### | 12.7M/41.5M [02:50<06:07, 82.3kB/s]
+ 31%|### | 12.7M/41.5M [02:50<06:17, 80.0kB/s]
+ 31%|### | 12.7M/41.5M [02:50<06:26, 78.1kB/s]
+ 31%|### | 12.7M/41.5M [02:51<05:43, 87.9kB/s]
+ 31%|### | 12.8M/41.5M [02:51<06:02, 83.1kB/s]
+ 31%|### | 12.8M/41.5M [02:51<05:50, 85.8kB/s]
+ 31%|### | 12.8M/41.5M [02:51<05:42, 87.8kB/s]
+ 31%|### | 12.8M/41.5M [02:51<05:12, 96.3kB/s]
+ 31%|### | 12.8M/41.5M [02:51<05:15, 95.2kB/s]
+ 31%|### | 12.8M/41.5M [02:52<05:43, 87.4kB/s]
+ 31%|### | 12.9M/41.5M [02:52<04:53, 102kB/s]
+ 31%|###1 | 12.9M/41.5M [02:52<05:21, 93.3kB/s]
+ 31%|###1 | 12.9M/41.5M [02:52<06:52, 72.7kB/s]
+ 31%|###1 | 12.9M/41.5M [02:53<05:37, 88.7kB/s]
+ 31%|###1 | 12.9M/41.5M [02:53<05:34, 89.6kB/s]
+ 31%|###1 | 13.0M/41.5M [02:53<06:54, 72.3kB/s]
+ 31%|###1 | 13.0M/41.5M [02:54<08:18, 60.0kB/s]
+ 31%|###1 | 13.0M/41.5M [02:54<07:29, 66.5kB/s]
+ 31%|###1 | 13.0M/41.5M [02:54<08:01, 62.1kB/s]
+ 31%|###1 | 13.0M/41.5M [02:54<08:32, 58.3kB/s]
+ 31%|###1 | 13.0M/41.5M [02:54<08:25, 59.1kB/s]
+ 31%|###1 | 13.0M/41.5M [02:55<08:57, 55.6kB/s]
+ 31%|###1 | 13.0M/41.5M [02:55<09:23, 52.9kB/s]
+ 31%|###1 | 13.0M/41.5M [02:55<12:34, 39.6kB/s]
+ 31%|###1 | 13.0M/41.5M [02:55<09:28, 52.5kB/s]
+ 31%|###1 | 13.1M/41.5M [02:55<09:45, 50.9kB/s]
+ 32%|###1 | 13.1M/41.5M [02:56<08:33, 58.1kB/s]
+ 32%|###1 | 13.1M/41.5M [02:56<09:01, 55.0kB/s]
+ 32%|###1 | 13.1M/41.5M [02:56<08:45, 56.7kB/s]
+ 32%|###1 | 13.1M/41.5M [02:56<08:25, 58.8kB/s]
+ 32%|###1 | 13.1M/41.5M [02:56<08:23, 59.1kB/s]
+ 32%|###1 | 13.1M/41.5M [02:56<08:16, 60.0kB/s]
+ 32%|###1 | 13.1M/41.5M [02:57<08:22, 59.1kB/s]
+ 32%|###1 | 13.1M/41.5M [02:57<07:05, 69.9kB/s]
+ 32%|###1 | 13.1M/41.5M [02:57<07:06, 69.7kB/s]
+ 32%|###1 | 13.2M/41.5M [02:57<07:05, 69.8kB/s]
+ 32%|###1 | 13.2M/41.5M [02:57<08:57, 55.3kB/s]
+ 32%|###1 | 13.2M/41.5M [02:58<09:19, 53.0kB/s]
+ 32%|###1 | 13.2M/41.5M [02:58<10:33, 46.8kB/s]
+ 32%|###1 | 13.2M/41.5M [02:58<07:25, 66.5kB/s]
+ 32%|###1 | 13.2M/41.5M [02:58<07:59, 61.8kB/s]
+ 32%|###1 | 13.2M/41.5M [02:59<08:31, 58.0kB/s]
+ 32%|###1 | 13.3M/41.5M [02:59<07:21, 67.0kB/s]
+ 32%|###1 | 13.3M/41.5M [02:59<12:25, 39.7kB/s]
+ 32%|###2 | 13.3M/41.5M [03:00<08:15, 59.7kB/s]
+ 32%|###2 | 13.3M/41.5M [03:00<08:40, 56.7kB/s]
+ 32%|###2 | 13.3M/41.5M [03:00<09:04, 54.2kB/s]
+ 32%|###2 | 13.3M/41.5M [03:00<07:40, 64.1kB/s]
+ 32%|###2 | 13.3M/41.5M [03:00<08:17, 59.4kB/s]
+ 32%|###2 | 13.3M/41.5M [03:01<09:10, 53.6kB/s]
+ 32%|###2 | 13.4M/41.5M [03:01<07:48, 62.9kB/s]
+ 32%|###2 | 13.4M/41.5M [03:01<08:22, 58.7kB/s]
+ 32%|###2 | 13.4M/41.5M [03:01<08:51, 55.5kB/s]
+ 32%|###2 | 13.4M/41.5M [03:02<11:34, 42.4kB/s]
+ 32%|###2 | 13.4M/41.5M [03:02<07:55, 62.0kB/s]
+ 32%|###2 | 13.4M/41.5M [03:02<08:23, 58.5kB/s]
+ 32%|###2 | 13.4M/41.5M [03:02<07:20, 66.8kB/s]
+ 32%|###2 | 13.4M/41.5M [03:02<07:57, 61.5kB/s]
+ 32%|###2 | 13.5M/41.5M [03:03<10:46, 45.5kB/s]
+ 33%|###2 | 13.5M/41.5M [03:03<07:37, 64.1kB/s]
+ 33%|###2 | 13.5M/41.5M [03:03<08:07, 60.2kB/s]
+ 33%|###2 | 13.5M/41.5M [03:03<08:36, 56.9kB/s]
+ 33%|###2 | 13.5M/41.5M [03:04<07:24, 66.0kB/s]
+ 33%|###2 | 13.5M/41.5M [03:04<10:12, 47.9kB/s]
+ 33%|###2 | 13.5M/41.5M [03:04<07:04, 69.0kB/s]
+ 33%|###2 | 13.6M/41.5M [03:04<07:41, 63.4kB/s]
+ 33%|###2 | 13.6M/41.5M [03:04<08:16, 58.9kB/s]
+ 33%|###2 | 13.6M/41.5M [03:05<08:47, 55.5kB/s]
+ 33%|###2 | 13.6M/41.5M [03:05<09:12, 53.0kB/s]
+ 33%|###2 | 13.6M/41.5M [03:05<07:34, 64.3kB/s]
+ 33%|###2 | 13.6M/41.5M [03:05<08:14, 59.1kB/s]
+ 33%|###2 | 13.6M/41.5M [03:05<08:47, 55.4kB/s]
+ 33%|###2 | 13.6M/41.5M [03:06<07:19, 66.4kB/s]
+ 33%|###2 | 13.6M/41.5M [03:06<08:03, 60.5kB/s]
+ 33%|###2 | 13.6M/41.5M [03:06<06:56, 70.1kB/s]
+ 33%|###2 | 13.7M/41.5M [03:06<06:19, 76.8kB/s]
+ 33%|###2 | 13.7M/41.5M [03:06<07:44, 62.8kB/s]
+ 33%|###3 | 13.7M/41.5M [03:07<05:56, 81.7kB/s]
+ 33%|###3 | 13.7M/41.5M [03:07<05:44, 84.6kB/s]
+ 33%|###3 | 13.7M/41.5M [03:07<05:35, 86.8kB/s]
+ 33%|###3 | 13.8M/41.5M [03:07<05:28, 88.5kB/s]
+ 33%|###3 | 13.8M/41.5M [03:07<05:24, 89.7kB/s]
+ 33%|###3 | 13.8M/41.5M [03:07<05:20, 90.6kB/s]
+ 33%|###3 | 13.8M/41.5M [03:08<05:18, 91.2kB/s]
+ 33%|###3 | 13.8M/41.5M [03:08<05:16, 91.7kB/s]
+ 33%|###3 | 13.8M/41.5M [03:08<05:15, 92.0kB/s]
+ 33%|###3 | 13.8M/41.5M [03:08<05:14, 92.2kB/s]
+ 33%|###3 | 13.9M/41.5M [03:08<05:13, 92.4kB/s]
+ 33%|###3 | 13.9M/41.5M [03:09<06:46, 71.2kB/s]
+ 33%|###3 | 13.9M/41.5M [03:09<05:29, 87.9kB/s]
+ 34%|###3 | 13.9M/41.5M [03:09<05:24, 89.2kB/s]
+ 34%|###3 | 13.9M/41.5M [03:09<05:20, 90.1kB/s]
+ 34%|###3 | 13.9M/41.5M [03:09<05:17, 90.9kB/s]
+ 34%|###3 | 14.0M/41.5M [03:10<05:15, 91.4kB/s]
+ 34%|###3 | 14.0M/41.5M [03:10<05:35, 85.9kB/s]
+ 34%|###3 | 14.0M/41.5M [03:10<05:07, 93.9kB/s]
+ 34%|###3 | 14.0M/41.5M [03:10<05:08, 93.6kB/s]
+ 34%|###3 | 14.0M/41.5M [03:10<05:08, 93.3kB/s]
+ 34%|###3 | 14.0M/41.5M [03:10<05:09, 93.1kB/s]
+ 34%|###3 | 14.1M/41.5M [03:11<05:09, 93.0kB/s]
+ 34%|###3 | 14.1M/41.5M [03:11<04:29, 107kB/s]
+ 34%|###3 | 14.1M/41.5M [03:11<04:39, 103kB/s]
+ 34%|###4 | 14.1M/41.5M [03:11<04:48, 99.7kB/s]
+ 34%|###4 | 14.1M/41.5M [03:11<04:17, 111kB/s]
+ 34%|###4 | 14.1M/41.5M [03:12<04:30, 106kB/s]
+ 34%|###4 | 14.2M/41.5M [03:12<05:00, 95.3kB/s]
+ 34%|###4 | 14.2M/41.5M [03:12<04:43, 101kB/s]
+ 34%|###4 | 14.2M/41.5M [03:12<04:50, 98.6kB/s]
+ 34%|###4 | 14.2M/41.5M [03:12<06:25, 74.2kB/s]
+ 34%|###4 | 14.2M/41.5M [03:13<07:54, 60.3kB/s]
+ 34%|###4 | 14.3M/41.5M [03:13<05:38, 84.3kB/s]
+ 34%|###4 | 14.3M/41.5M [03:13<05:48, 81.9kB/s]
+ 34%|###4 | 14.3M/41.5M [03:14<06:36, 72.0kB/s]
+ 34%|###4 | 14.3M/41.5M [03:14<08:54, 53.3kB/s]
+ 35%|###4 | 14.3M/41.5M [03:14<06:10, 76.8kB/s]
+ 35%|###4 | 14.4M/41.5M [03:15<08:36, 55.1kB/s]
+ 35%|###4 | 14.4M/41.5M [03:15<06:36, 71.7kB/s]
+ 35%|###4 | 14.4M/41.5M [03:15<06:53, 68.8kB/s]
+ 35%|###4 | 14.4M/41.5M [03:16<08:39, 54.7kB/s]
+ 35%|###4 | 14.4M/41.5M [03:16<06:30, 72.6kB/s]
+ 35%|###4 | 14.5M/41.5M [03:16<07:26, 63.5kB/s]
+ 35%|###4 | 14.5M/41.5M [03:17<06:48, 69.3kB/s]
+ 35%|###4 | 14.5M/41.5M [03:17<06:20, 74.5kB/s]
+ 35%|###4 | 14.5M/41.5M [03:17<05:58, 78.9kB/s]
+ 35%|###4 | 14.5M/41.5M [03:17<07:10, 65.7kB/s]
+ 35%|###5 | 14.5M/41.5M [03:17<06:33, 71.8kB/s]
+ 35%|###5 | 14.5M/41.5M [03:18<06:07, 76.9kB/s]
+ 35%|###5 | 14.6M/41.5M [03:18<05:48, 81.0kB/s]
+ 35%|###5 | 14.6M/41.5M [03:18<05:35, 84.1kB/s]
+ 35%|###5 | 14.6M/41.5M [03:18<05:25, 86.5kB/s]
+ 35%|###5 | 14.6M/41.5M [03:18<06:50, 68.7kB/s]
+ 35%|###5 | 14.6M/41.5M [03:19<04:51, 96.7kB/s]
+ 35%|###5 | 14.7M/41.5M [03:19<04:54, 95.7kB/s]
+ 35%|###5 | 14.7M/41.5M [03:19<04:56, 94.9kB/s]
+ 35%|###5 | 14.7M/41.5M [03:19<06:20, 73.8kB/s]
+ 35%|###5 | 14.7M/41.5M [03:20<06:28, 72.2kB/s]
+ 36%|###5 | 14.7M/41.5M [03:20<05:24, 86.5kB/s]
+ 36%|###5 | 14.8M/41.5M [03:20<05:18, 88.0kB/s]
+ 36%|###5 | 14.8M/41.5M [03:20<05:14, 89.2kB/s]
+ 36%|###5 | 14.8M/41.5M [03:21<06:32, 71.3kB/s]
+ 36%|###5 | 14.8M/41.5M [03:21<07:07, 65.5kB/s]
+ 36%|###5 | 14.8M/41.5M [03:21<08:03, 57.9kB/s]
+ 36%|###5 | 14.8M/41.5M [03:22<08:25, 55.3kB/s]
+ 36%|###5 | 14.9M/41.5M [03:22<07:31, 61.9kB/s]
+ 36%|###5 | 14.9M/41.5M [03:22<07:51, 59.1kB/s]
+ 36%|###5 | 14.9M/41.5M [03:22<08:13, 56.6kB/s]
+ 36%|###5 | 14.9M/41.5M [03:23<07:11, 64.6kB/s]
+ 36%|###5 | 14.9M/41.5M [03:23<07:42, 60.2kB/s]
+ 36%|###5 | 14.9M/41.5M [03:23<08:11, 56.7kB/s]
+ 36%|###5 | 14.9M/41.5M [03:23<06:59, 66.3kB/s]
+ 36%|###5 | 14.9M/41.5M [03:23<07:37, 60.8kB/s]
+ 36%|###6 | 14.9M/41.5M [03:23<08:10, 56.7kB/s]
+ 36%|###6 | 15.0M/41.5M [03:24<06:54, 67.1kB/s]
+ 36%|###6 | 15.0M/41.5M [03:24<07:35, 61.1kB/s]
+ 36%|###6 | 15.0M/41.5M [03:24<06:35, 70.4kB/s]
+ 36%|###6 | 15.0M/41.5M [03:24<07:18, 63.3kB/s]
+ 36%|###6 | 15.0M/41.5M [03:24<06:25, 72.1kB/s]
+ 36%|###6 | 15.0M/41.5M [03:25<05:54, 78.2kB/s]
+ 36%|###6 | 15.0M/41.5M [03:25<05:36, 82.6kB/s]
+ 36%|###6 | 15.0M/41.5M [03:25<07:01, 65.8kB/s]
+ 36%|###6 | 15.1M/41.5M [03:25<09:27, 48.9kB/s]
+ 36%|###6 | 15.1M/41.5M [03:26<05:50, 79.0kB/s]
+ 36%|###6 | 15.1M/41.5M [03:26<05:36, 82.3kB/s]
+ 36%|###6 | 15.1M/41.5M [03:26<08:11, 56.2kB/s]
+ 37%|###6 | 15.1M/41.5M [03:26<05:39, 81.3kB/s]
+ 37%|###6 | 15.2M/41.5M [03:27<05:29, 83.8kB/s]
+ 37%|###6 | 15.2M/41.5M [03:27<08:11, 56.1kB/s]
+ 37%|###6 | 15.2M/41.5M [03:27<06:10, 74.3kB/s]
+ 37%|###6 | 15.2M/41.5M [03:28<07:06, 64.5kB/s]
+ 37%|###6 | 15.2M/41.5M [03:28<06:32, 70.2kB/s]
+ 37%|###6 | 15.2M/41.5M [03:28<07:27, 61.5kB/s]
+ 37%|###6 | 15.3M/41.5M [03:28<06:44, 68.0kB/s]
+ 37%|###6 | 15.3M/41.5M [03:29<06:13, 73.6kB/s]
+ 37%|###6 | 15.3M/41.5M [03:29<05:50, 78.3kB/s]
+ 37%|###6 | 15.3M/41.5M [03:29<05:34, 82.1kB/s]
+ 37%|###6 | 15.3M/41.5M [03:29<05:22, 85.0kB/s]
+ 37%|###6 | 15.3M/41.5M [03:30<08:10, 55.9kB/s]
+ 37%|###7 | 15.4M/41.5M [03:30<07:33, 60.4kB/s]
+ 37%|###7 | 15.4M/41.5M [03:30<06:01, 75.7kB/s]
+ 37%|###7 | 15.4M/41.5M [03:31<11:02, 41.3kB/s]
+ 37%|###7 | 15.4M/41.5M [03:31<08:30, 53.5kB/s]
+ 37%|###7 | 15.4M/41.5M [03:32<10:03, 45.2kB/s]
+ 37%|###7 | 15.5M/41.5M [03:32<10:01, 45.4kB/s]
+ 37%|###7 | 15.5M/41.5M [03:32<09:58, 45.6kB/s]
+ 37%|###7 | 15.5M/41.5M [03:32<09:56, 45.7kB/s]
+ 37%|###7 | 15.5M/41.5M [03:33<09:54, 45.9kB/s]
+ 37%|###7 | 15.5M/41.5M [03:33<09:52, 46.0kB/s]
+ 37%|###7 | 15.5M/41.5M [03:33<09:51, 46.1kB/s]
+ 37%|###7 | 15.5M/41.5M [03:33<12:28, 36.4kB/s]
+ 37%|###7 | 15.5M/41.5M [03:34<11:18, 40.2kB/s]
+ 37%|###7 | 15.5M/41.5M [03:34<10:56, 41.5kB/s]
+ 37%|###7 | 15.5M/41.5M [03:34<10:38, 42.6kB/s]
+ 37%|###7 | 15.5M/41.5M [03:34<08:13, 55.1kB/s]
+ 37%|###7 | 15.6M/41.5M [03:34<10:50, 41.8kB/s]
+ 38%|###7 | 15.6M/41.5M [03:35<08:54, 50.8kB/s]
+ 38%|###7 | 15.6M/41.5M [03:35<09:05, 49.8kB/s]
+ 38%|###7 | 15.6M/41.5M [03:35<08:43, 51.9kB/s]
+ 38%|###7 | 15.6M/41.5M [03:35<08:58, 50.4kB/s]
+ 38%|###7 | 15.6M/41.5M [03:35<09:11, 49.3kB/s]
+ 38%|###7 | 15.6M/41.5M [03:36<09:20, 48.4kB/s]
+ 38%|###7 | 15.6M/41.5M [03:36<07:22, 61.3kB/s]
+ 38%|###7 | 15.6M/41.5M [03:36<07:56, 56.9kB/s]
+ 38%|###7 | 15.6M/41.5M [03:36<06:41, 67.5kB/s]
+ 38%|###7 | 15.7M/41.5M [03:36<07:22, 61.3kB/s]
+ 38%|###7 | 15.7M/41.5M [03:36<06:23, 70.7kB/s]
+ 38%|###7 | 15.7M/41.5M [03:37<07:06, 63.4kB/s]
+ 38%|###7 | 15.7M/41.5M [03:37<08:06, 55.6kB/s]
+ 38%|###7 | 15.7M/41.5M [03:37<06:15, 72.0kB/s]
+ 38%|###7 | 15.7M/41.5M [03:37<06:28, 69.6kB/s]
+ 38%|###7 | 15.7M/41.5M [03:38<07:33, 59.6kB/s]
+ 38%|###7 | 15.8M/41.5M [03:38<06:22, 70.6kB/s]
+ 38%|###8 | 15.8M/41.5M [03:38<06:32, 68.7kB/s]
+ 38%|###8 | 15.8M/41.5M [03:38<05:59, 75.0kB/s]
+ 38%|###8 | 15.8M/41.5M [03:39<08:29, 52.9kB/s]
+ 38%|###8 | 15.8M/41.5M [03:39<06:08, 73.0kB/s]
+ 38%|###8 | 15.8M/41.5M [03:39<06:29, 69.0kB/s]
+ 38%|###8 | 15.8M/41.5M [03:39<06:34, 68.2kB/s]
+ 38%|###8 | 15.9M/41.5M [03:39<05:59, 74.8kB/s]
+ 38%|###8 | 15.9M/41.5M [03:40<08:32, 52.4kB/s]
+ 38%|###8 | 15.9M/41.5M [03:40<06:07, 72.9kB/s]
+ 38%|###8 | 15.9M/41.5M [03:40<05:45, 77.7kB/s]
+ 38%|###8 | 15.9M/41.5M [03:40<06:34, 68.0kB/s]
+ 38%|###8 | 15.9M/41.5M [03:41<06:02, 73.9kB/s]
+ 38%|###8 | 15.9M/41.5M [03:41<08:22, 53.3kB/s]
+ 38%|###8 | 16.0M/41.5M [03:41<06:08, 72.6kB/s]
+ 39%|###8 | 16.0M/41.5M [03:42<08:32, 52.2kB/s]
+ 39%|###8 | 16.0M/41.5M [03:42<07:12, 61.8kB/s]
+ 39%|###8 | 16.0M/41.5M [03:42<07:54, 56.3kB/s]
+ 39%|###8 | 16.0M/41.5M [03:42<08:12, 54.3kB/s]
+ 39%|###8 | 16.0M/41.5M [03:42<07:04, 62.9kB/s]
+ 39%|###8 | 16.0M/41.5M [03:43<09:23, 47.4kB/s]
+ 39%|###8 | 16.1M/41.5M [03:43<07:45, 57.3kB/s]
+ 39%|###8 | 16.1M/41.5M [03:43<08:07, 54.7kB/s]
+ 39%|###8 | 16.1M/41.5M [03:43<08:39, 51.3kB/s]
+ 39%|###8 | 16.1M/41.5M [03:44<07:19, 60.6kB/s]
+ 39%|###8 | 16.1M/41.5M [03:44<07:45, 57.2kB/s]
+ 39%|###8 | 16.1M/41.5M [03:44<08:09, 54.4kB/s]
+ 39%|###8 | 16.1M/41.5M [03:44<06:50, 64.7kB/s]
+ 39%|###8 | 16.1M/41.5M [03:45<15:59, 27.7kB/s]
+ 39%|###9 | 16.2M/41.5M [03:45<06:29, 68.0kB/s]
+ 39%|###9 | 16.2M/41.5M [03:45<06:13, 70.9kB/s]
+ 39%|###9 | 16.2M/41.5M [03:46<08:06, 54.5kB/s]
+ 39%|###9 | 16.2M/41.5M [03:46<06:22, 69.3kB/s]
+ 39%|###9 | 16.3M/41.5M [03:47<07:09, 61.6kB/s]
+ 39%|###9 | 16.3M/41.5M [03:47<07:46, 56.7kB/s]
+ 39%|###9 | 16.3M/41.5M [03:47<07:15, 60.6kB/s]
+ 39%|###9 | 16.3M/41.5M [03:47<07:37, 57.7kB/s]
+ 39%|###9 | 16.3M/41.5M [03:47<07:31, 58.5kB/s]
+ 39%|###9 | 16.3M/41.5M [03:48<07:56, 55.4kB/s]
+ 39%|###9 | 16.3M/41.5M [03:48<08:50, 49.7kB/s]
+ 39%|###9 | 16.3M/41.5M [03:48<08:25, 52.2kB/s]
+ 39%|###9 | 16.3M/41.5M [03:48<07:22, 59.6kB/s]
+ 39%|###9 | 16.4M/41.5M [03:48<07:18, 60.1kB/s]
+ 39%|###9 | 16.4M/41.5M [03:48<07:50, 56.0kB/s]
+ 39%|###9 | 16.4M/41.5M [03:49<08:15, 53.1kB/s]
+ 39%|###9 | 16.4M/41.5M [03:49<06:45, 65.0kB/s]
+ 40%|###9 | 16.4M/41.5M [03:49<07:23, 59.4kB/s]
+ 40%|###9 | 16.4M/41.5M [03:49<06:44, 65.0kB/s]
+ 40%|###9 | 16.4M/41.5M [03:49<06:52, 63.8kB/s]
+ 40%|###9 | 16.4M/41.5M [03:50<06:01, 72.6kB/s]
+ 40%|###9 | 16.4M/41.5M [03:50<05:56, 73.6kB/s]
+ 40%|###9 | 16.5M/41.5M [03:50<05:31, 79.2kB/s]
+ 40%|###9 | 16.5M/41.5M [03:50<05:51, 74.5kB/s]
+ 40%|###9 | 16.5M/41.5M [03:50<07:07, 61.3kB/s]
+ 40%|###9 | 16.5M/41.5M [03:51<04:59, 87.6kB/s]
+ 40%|###9 | 16.5M/41.5M [03:51<04:54, 88.9kB/s]
+ 40%|###9 | 16.5M/41.5M [03:51<05:12, 83.8kB/s]
+ 40%|###9 | 16.6M/41.5M [03:51<06:01, 72.4kB/s]
+ 40%|###9 | 16.6M/41.5M [03:51<06:34, 66.2kB/s]
+ 40%|###9 | 16.6M/41.5M [03:52<05:58, 72.9kB/s]
+ 40%|#### | 16.6M/41.5M [03:52<05:33, 78.2kB/s]
+ 40%|#### | 16.6M/41.5M [03:52<05:17, 82.2kB/s]
+ 40%|#### | 16.6M/41.5M [03:52<05:05, 85.2kB/s]
+ 40%|#### | 16.6M/41.5M [03:53<06:24, 67.8kB/s]
+ 40%|#### | 16.7M/41.5M [03:53<05:52, 73.9kB/s]
+ 40%|#### | 16.7M/41.5M [03:53<05:30, 78.8kB/s]
+ 40%|#### | 16.7M/41.5M [03:53<05:15, 82.5kB/s]
+ 40%|#### | 16.7M/41.5M [03:53<06:28, 66.8kB/s]
+ 40%|#### | 16.7M/41.5M [03:54<05:09, 83.8kB/s]
+ 40%|#### | 16.8M/41.5M [03:54<05:04, 85.1kB/s]
+ 40%|#### | 16.8M/41.5M [03:54<05:13, 82.6kB/s]
+ 40%|#### | 16.8M/41.5M [03:54<05:03, 85.3kB/s]
+ 40%|#### | 16.8M/41.5M [03:54<04:39, 92.6kB/s]
+ 41%|#### | 16.8M/41.5M [03:55<04:56, 87.3kB/s]
+ 41%|#### | 16.8M/41.5M [03:55<04:51, 88.9kB/s]
+ 41%|#### | 16.8M/41.5M [03:55<04:47, 90.0kB/s]
+ 41%|#### | 16.9M/41.5M [03:55<04:44, 90.8kB/s]
+ 41%|#### | 16.9M/41.5M [03:55<05:44, 74.9kB/s]
+ 41%|#### | 16.9M/41.5M [03:56<04:45, 90.3kB/s]
+ 41%|#### | 16.9M/41.5M [03:56<04:43, 91.0kB/s]
+ 41%|#### | 16.9M/41.5M [03:56<05:17, 81.1kB/s]
+ 41%|#### | 16.9M/41.5M [03:56<07:24, 57.9kB/s]
+ 41%|#### | 17.0M/41.5M [03:57<05:31, 77.6kB/s]
+ 41%|#### | 17.0M/41.5M [03:57<06:30, 65.8kB/s]
+ 41%|#### | 17.0M/41.5M [03:57<05:58, 71.5kB/s]
+ 41%|####1 | 17.0M/41.5M [03:57<05:35, 76.5kB/s]
+ 41%|####1 | 17.0M/41.5M [03:58<05:18, 80.5kB/s]
+ 41%|####1 | 17.0M/41.5M [03:58<05:06, 83.7kB/s]
+ 41%|####1 | 17.1M/41.5M [03:58<04:57, 86.2kB/s]
+ 41%|####1 | 17.1M/41.5M [03:58<06:12, 68.8kB/s]
+ 41%|####1 | 17.1M/41.5M [03:58<05:43, 74.5kB/s]
+ 41%|####1 | 17.1M/41.5M [03:59<05:23, 79.1kB/s]
+ 41%|####1 | 17.1M/41.5M [03:59<05:08, 82.7kB/s]
+ 41%|####1 | 17.1M/41.5M [03:59<04:58, 85.5kB/s]
+ 41%|####1 | 17.2M/41.5M [03:59<04:13, 101kB/s]
+ 41%|####1 | 17.2M/41.5M [03:59<04:37, 92.0kB/s]
+ 41%|####1 | 17.2M/41.5M [03:59<04:18, 98.6kB/s]
+ 41%|####1 | 17.2M/41.5M [04:00<06:50, 62.0kB/s]
+ 42%|####1 | 17.2M/41.5M [04:00<05:31, 76.7kB/s]
+ 42%|####1 | 17.2M/41.5M [04:00<05:16, 80.3kB/s]
+ 42%|####1 | 17.3M/41.5M [04:01<06:19, 66.9kB/s]
+ 42%|####1 | 17.3M/41.5M [04:01<05:49, 72.6kB/s]
+ 42%|####1 | 17.3M/41.5M [04:01<05:27, 77.4kB/s]
+ 42%|####1 | 17.3M/41.5M [04:01<06:31, 64.8kB/s]
+ 42%|####1 | 17.3M/41.5M [04:02<07:17, 57.9kB/s]
+ 42%|####1 | 17.3M/41.5M [04:02<07:59, 52.8kB/s]
+ 42%|####1 | 17.4M/41.5M [04:02<07:58, 52.9kB/s]
+ 42%|####1 | 17.4M/41.5M [04:02<08:11, 51.5kB/s]
+ 42%|####1 | 17.4M/41.5M [04:03<10:19, 40.8kB/s]
+ 42%|####1 | 17.4M/41.5M [04:03<14:17, 29.5kB/s]
+ 42%|####1 | 17.4M/41.5M [04:04<14:44, 28.6kB/s]
+ 42%|####1 | 17.4M/41.5M [04:04<12:01, 35.0kB/s]
+ 42%|####1 | 17.4M/41.5M [04:04<11:24, 36.9kB/s]
+ 42%|####1 | 17.4M/41.5M [04:05<10:52, 38.7kB/s]
+ 42%|####2 | 17.4M/41.5M [04:05<10:25, 40.3kB/s]
+ 42%|####2 | 17.4M/41.5M [04:05<10:04, 41.7kB/s]
+ 42%|####2 | 17.4M/41.5M [04:05<12:14, 34.3kB/s]
+ 42%|####2 | 17.5M/41.5M [04:05<11:21, 37.0kB/s]
+ 42%|####2 | 17.5M/41.5M [04:06<10:41, 39.3kB/s]
+ 42%|####2 | 17.5M/41.5M [04:06<10:13, 41.1kB/s]
+ 42%|####2 | 17.5M/41.5M [04:06<09:52, 42.5kB/s]
+ 42%|####2 | 17.5M/41.5M [04:06<09:37, 43.6kB/s]
+ 42%|####2 | 17.5M/41.5M [04:07<12:07, 34.6kB/s]
+ 42%|####2 | 17.5M/41.5M [04:07<11:12, 37.4kB/s]
+ 42%|####2 | 17.5M/41.5M [04:07<13:15, 31.6kB/s]
+ 42%|####2 | 17.5M/41.5M [04:07<09:13, 45.4kB/s]
+ 42%|####2 | 17.5M/41.5M [04:08<09:08, 45.8kB/s]
+ 42%|####2 | 17.6M/41.5M [04:08<07:27, 56.0kB/s]
+ 42%|####2 | 17.6M/41.5M [04:08<07:46, 53.7kB/s]
+ 42%|####2 | 17.6M/41.5M [04:08<08:03, 51.9kB/s]
+ 42%|####2 | 17.6M/41.5M [04:08<08:17, 50.4kB/s]
+ 42%|####2 | 17.6M/41.5M [04:08<08:28, 49.3kB/s]
+ 42%|####2 | 17.6M/41.5M [04:09<06:46, 61.6kB/s]
+ 42%|####2 | 17.6M/41.5M [04:09<07:17, 57.2kB/s]
+ 42%|####2 | 17.6M/41.5M [04:09<09:57, 41.9kB/s]
+ 42%|####2 | 17.6M/41.5M [04:09<09:42, 43.0kB/s]
+ 42%|####2 | 17.6M/41.5M [04:10<09:30, 43.8kB/s]
+ 43%|####2 | 17.6M/41.5M [04:10<09:21, 44.5kB/s]
+ 43%|####2 | 17.6M/41.5M [04:10<11:49, 35.2kB/s]
+ 43%|####2 | 17.7M/41.5M [04:10<08:31, 48.9kB/s]
+ 43%|####2 | 17.7M/41.5M [04:11<10:48, 38.5kB/s]
+ 43%|####2 | 17.7M/41.5M [04:11<08:13, 50.6kB/s]
+ 43%|####2 | 17.7M/41.5M [04:11<10:25, 39.9kB/s]
+ 43%|####2 | 17.7M/41.5M [04:11<09:50, 42.2kB/s]
+ 43%|####2 | 17.7M/41.5M [04:12<09:38, 43.1kB/s]
+ 43%|####2 | 17.7M/41.5M [04:12<11:36, 35.8kB/s]
+ 43%|####2 | 17.7M/41.5M [04:12<10:56, 38.0kB/s]
+ 43%|####2 | 17.7M/41.5M [04:12<10:24, 39.9kB/s]
+ 43%|####2 | 17.8M/41.5M [04:13<09:59, 41.5kB/s]
+ 43%|####2 | 17.8M/41.5M [04:13<09:41, 42.8kB/s]
+ 43%|####2 | 17.8M/41.5M [04:13<09:28, 43.7kB/s]
+ 43%|####2 | 17.8M/41.5M [04:13<09:19, 44.5kB/s]
+ 43%|####2 | 17.8M/41.5M [04:13<09:12, 45.0kB/s]
+ 43%|####2 | 17.8M/41.5M [04:14<11:45, 35.2kB/s]
+ 43%|####2 | 17.8M/41.5M [04:14<10:54, 37.9kB/s]
+ 43%|####2 | 17.8M/41.5M [04:14<08:27, 49.0kB/s]
+ 43%|####2 | 17.8M/41.5M [04:14<08:33, 48.3kB/s]
+ 43%|####2 | 17.8M/41.5M [04:15<10:58, 37.7kB/s]
+ 43%|####2 | 17.8M/41.5M [04:15<09:49, 42.1kB/s]
+ 43%|####3 | 17.8M/41.5M [04:15<09:33, 43.2kB/s]
+ 43%|####3 | 17.9M/41.5M [04:15<10:49, 38.2kB/s]
+ 43%|####3 | 17.9M/41.5M [04:15<10:15, 40.2kB/s]
+ 43%|####3 | 17.9M/41.5M [04:15<09:51, 41.8kB/s]
+ 43%|####3 | 17.9M/41.5M [04:16<09:34, 43.1kB/s]
+ 43%|####3 | 17.9M/41.5M [04:16<09:22, 44.0kB/s]
+ 43%|####3 | 17.9M/41.5M [04:16<09:13, 44.7kB/s]
+ 43%|####3 | 17.9M/41.5M [04:16<07:54, 52.2kB/s]
+ 43%|####3 | 17.9M/41.5M [04:16<07:12, 57.2kB/s]
+ 43%|####3 | 17.9M/41.5M [04:17<07:38, 54.0kB/s]
+ 43%|####3 | 17.9M/41.5M [04:17<06:16, 65.7kB/s]
+ 43%|####3 | 17.9M/41.5M [04:17<06:52, 59.9kB/s]
+ 43%|####3 | 18.0M/41.5M [04:17<05:53, 69.8kB/s]
+ 43%|####3 | 18.0M/41.5M [04:17<06:58, 58.9kB/s]
+ 43%|####3 | 18.0M/41.5M [04:18<05:12, 78.9kB/s]
+ 43%|####3 | 18.0M/41.5M [04:18<04:58, 82.5kB/s]
+ 43%|####3 | 18.0M/41.5M [04:18<04:48, 85.3kB/s]
+ 43%|####3 | 18.0M/41.5M [04:18<06:00, 68.3kB/s]
+ 44%|####3 | 18.1M/41.5M [04:18<05:31, 74.0kB/s]
+ 44%|####3 | 18.1M/41.5M [04:19<05:11, 78.7kB/s]
+ 44%|####3 | 18.1M/41.5M [04:19<04:57, 82.4kB/s]
+ 44%|####3 | 18.1M/41.5M [04:19<04:47, 85.3kB/s]
+ 44%|####3 | 18.1M/41.5M [04:19<04:40, 87.4kB/s]
+ 44%|####3 | 18.1M/41.5M [04:20<05:54, 69.1kB/s]
+ 44%|####3 | 18.2M/41.5M [04:20<05:00, 81.4kB/s]
+ 44%|####3 | 18.2M/41.5M [04:20<05:05, 80.1kB/s]
+ 44%|####3 | 18.2M/41.5M [04:20<04:53, 83.2kB/s]
+ 44%|####3 | 18.2M/41.5M [04:20<04:44, 85.8kB/s]
+ 44%|####3 | 18.2M/41.5M [04:20<04:38, 87.7kB/s]
+ 44%|####3 | 18.2M/41.5M [04:21<04:33, 89.1kB/s]
+ 44%|####4 | 18.3M/41.5M [04:21<04:30, 90.2kB/s]
+ 44%|####4 | 18.3M/41.5M [04:21<04:27, 90.9kB/s]
+ 44%|####4 | 18.3M/41.5M [04:21<04:25, 91.5kB/s]
+ 44%|####4 | 18.3M/41.5M [04:21<04:24, 91.8kB/s]
+ 44%|####4 | 18.3M/41.5M [04:22<04:23, 92.1kB/s]
+ 44%|####4 | 18.3M/41.5M [04:22<04:06, 98.5kB/s]
+ 44%|####4 | 18.4M/41.5M [04:22<04:10, 96.7kB/s]
+ 44%|####4 | 18.4M/41.5M [04:22<04:13, 95.5kB/s]
+ 44%|####4 | 18.4M/41.5M [04:22<03:42, 109kB/s]
+ 44%|####4 | 18.4M/41.5M [04:22<03:38, 111kB/s]
+ 44%|####4 | 18.4M/41.5M [04:23<03:22, 119kB/s]
+ 44%|####4 | 18.5M/41.5M [04:23<03:23, 119kB/s]
+ 45%|####4 | 18.5M/41.5M [04:23<03:13, 125kB/s]
+ 45%|####4 | 18.5M/41.5M [04:23<03:18, 122kB/s]
+ 45%|####4 | 18.5M/41.5M [04:23<02:58, 135kB/s]
+ 45%|####4 | 18.5M/41.5M [04:23<02:56, 136kB/s]
+ 45%|####4 | 18.6M/41.5M [04:24<02:55, 137kB/s]
+ 45%|####4 | 18.6M/41.5M [04:24<02:48, 143kB/s]
+ 45%|####4 | 18.6M/41.5M [04:24<02:38, 151kB/s]
+ 45%|####4 | 18.6M/41.5M [04:24<03:32, 113kB/s]
+ 45%|####5 | 18.7M/41.5M [04:25<03:06, 128kB/s]
+ 45%|####5 | 18.7M/41.5M [04:25<02:52, 139kB/s]
+ 45%|####5 | 18.7M/41.5M [04:25<03:09, 126kB/s]
+ 45%|####5 | 18.8M/41.5M [04:25<03:03, 130kB/s]
+ 45%|####5 | 18.8M/41.5M [04:25<03:19, 119kB/s]
+ 45%|####5 | 18.8M/41.5M [04:26<04:35, 86.5kB/s]
+ 45%|####5 | 18.8M/41.5M [04:26<05:35, 70.9kB/s]
+ 45%|####5 | 18.8M/41.5M [04:26<04:50, 81.7kB/s]
+ 45%|####5 | 18.8M/41.5M [04:27<07:56, 49.9kB/s]
+ 45%|####5 | 18.9M/41.5M [04:27<09:22, 42.2kB/s]
+ 45%|####5 | 18.9M/41.5M [04:28<07:00, 56.4kB/s]
+ 46%|####5 | 18.9M/41.5M [04:28<07:23, 53.4kB/s]
+ 46%|####5 | 18.9M/41.5M [04:28<06:32, 60.3kB/s]
+ 46%|####5 | 18.9M/41.5M [04:28<06:51, 57.5kB/s]
+ 46%|####5 | 18.9M/41.5M [04:28<07:09, 55.0kB/s]
+ 46%|####5 | 18.9M/41.5M [04:29<06:08, 64.1kB/s]
+ 46%|####5 | 19.0M/41.5M [04:29<08:19, 47.3kB/s]
+ 46%|####5 | 19.0M/41.5M [04:29<07:01, 56.1kB/s]
+ 46%|####5 | 19.0M/41.5M [04:30<07:17, 54.0kB/s]
+ 46%|####5 | 19.0M/41.5M [04:30<06:13, 63.1kB/s]
+ 46%|####5 | 19.0M/41.5M [04:30<08:41, 45.2kB/s]
+ 46%|####5 | 19.0M/41.5M [04:30<08:38, 45.4kB/s]
+ 46%|####5 | 19.0M/41.5M [04:31<10:38, 36.9kB/s]
+ 46%|####5 | 19.0M/41.5M [04:31<12:14, 32.1kB/s]
+ 46%|####5 | 19.0M/41.5M [04:31<11:13, 35.0kB/s]
+ 46%|####5 | 19.0M/41.5M [04:32<15:46, 24.9kB/s]
+ 46%|####5 | 19.0M/41.5M [04:32<18:29, 21.2kB/s]
+ 46%|####5 | 19.1M/41.5M [04:33<15:52, 24.7kB/s]
+ 46%|####5 | 19.1M/41.5M [04:33<12:32, 31.2kB/s]
+ 46%|####6 | 19.1M/41.5M [04:34<14:13, 27.5kB/s]
+ 46%|####6 | 19.1M/41.5M [04:34<14:02, 27.9kB/s]
+ 46%|####6 | 19.1M/41.5M [04:34<14:45, 26.5kB/s]
+ 46%|####6 | 19.1M/41.5M [04:34<13:58, 28.0kB/s]
+ 46%|####6 | 19.1M/41.5M [04:35<14:23, 27.2kB/s]
+ 46%|####6 | 19.1M/41.5M [04:35<15:28, 25.3kB/s]
+ 46%|####6 | 19.1M/41.5M [04:35<13:03, 29.9kB/s]
+ 46%|####6 | 19.1M/41.5M [04:35<12:05, 32.3kB/s]
+ 46%|####6 | 19.1M/41.5M [04:36<13:28, 29.0kB/s]
+ 46%|####6 | 19.2M/41.5M [04:36<11:58, 32.6kB/s]
+ 46%|####6 | 19.2M/41.5M [04:36<13:24, 29.1kB/s]
+ 46%|####6 | 19.2M/41.5M [04:37<11:55, 32.7kB/s]
+ 46%|####6 | 19.2M/41.5M [04:37<13:22, 29.1kB/s]
+ 46%|####6 | 19.2M/41.5M [04:37<10:16, 37.9kB/s]
+ 46%|####6 | 19.2M/41.5M [04:37<10:40, 36.5kB/s]
+ 46%|####6 | 19.2M/41.5M [04:38<10:04, 38.6kB/s]
+ 46%|####6 | 19.2M/41.5M [04:38<09:37, 40.5kB/s]
+ 46%|####6 | 19.2M/41.5M [04:38<09:16, 42.0kB/s]
+ 46%|####6 | 19.2M/41.5M [04:38<11:24, 34.1kB/s]
+ 46%|####6 | 19.2M/41.5M [04:39<10:02, 38.7kB/s]
+ 46%|####6 | 19.3M/41.5M [04:39<09:38, 40.3kB/s]
+ 46%|####6 | 19.3M/41.5M [04:39<09:18, 41.7kB/s]
+ 46%|####6 | 19.3M/41.5M [04:39<11:18, 34.3kB/s]
+ 46%|####6 | 19.3M/41.5M [04:40<10:29, 37.0kB/s]
+ 46%|####6 | 19.3M/41.5M [04:40<09:52, 39.3kB/s]
+ 47%|####6 | 19.3M/41.5M [04:40<09:26, 41.1kB/s]
+ 47%|####6 | 19.3M/41.5M [04:40<09:07, 42.5kB/s]
+ 47%|####6 | 19.3M/41.5M [04:40<08:46, 44.2kB/s]
+ 47%|####6 | 19.3M/41.5M [04:41<08:59, 43.1kB/s]
+ 47%|####6 | 19.4M/41.5M [04:41<08:24, 46.0kB/s]
+ 47%|####6 | 19.4M/41.5M [04:41<08:46, 44.1kB/s]
+ 47%|####6 | 19.4M/41.5M [04:42<08:40, 44.6kB/s]
+ 47%|####6 | 19.4M/41.5M [04:42<08:35, 45.0kB/s]
+ 47%|####6 | 19.4M/41.5M [04:42<08:31, 45.3kB/s]
+ 47%|####6 | 19.4M/41.5M [04:42<08:27, 45.6kB/s]
+ 47%|####6 | 19.4M/41.5M [04:42<08:25, 45.8kB/s]
+ 47%|####6 | 19.4M/41.5M [04:42<08:23, 46.0kB/s]
+ 47%|####6 | 19.4M/41.5M [04:43<07:49, 49.3kB/s]
+ 47%|####6 | 19.4M/41.5M [04:43<07:58, 48.4kB/s]
+ 47%|####6 | 19.4M/41.5M [04:43<10:30, 36.7kB/s]
+ 47%|####6 | 19.5M/41.5M [04:43<06:32, 58.8kB/s]
+ 47%|####6 | 19.5M/41.5M [04:43<06:54, 55.7kB/s]
+ 47%|####6 | 19.5M/41.5M [04:44<05:52, 65.5kB/s]
+ 47%|####6 | 19.5M/41.5M [04:44<05:16, 73.0kB/s]
+ 47%|####6 | 19.5M/41.5M [04:44<05:52, 65.5kB/s]
+ 47%|####7 | 19.5M/41.5M [04:44<05:14, 73.3kB/s]
+ 47%|####7 | 19.5M/41.5M [04:44<04:51, 79.0kB/s]
+ 47%|####7 | 19.5M/41.5M [04:45<04:37, 83.0kB/s]
+ 47%|####7 | 19.6M/41.5M [04:45<04:27, 85.9kB/s]
+ 47%|####7 | 19.6M/41.5M [04:45<04:21, 87.9kB/s]
+ 47%|####7 | 19.6M/41.5M [04:45<04:16, 89.4kB/s]
+ 47%|####7 | 19.6M/41.5M [04:45<05:29, 69.6kB/s]
+ 47%|####7 | 19.6M/41.5M [04:46<05:04, 75.3kB/s]
+ 47%|####7 | 19.6M/41.5M [04:46<06:01, 63.3kB/s]
+ 47%|####7 | 19.7M/41.5M [04:46<04:59, 76.3kB/s]
+ 47%|####7 | 19.7M/41.5M [04:46<05:35, 68.1kB/s]
+ 47%|####7 | 19.7M/41.5M [04:47<05:27, 69.8kB/s]
+ 47%|####7 | 19.7M/41.5M [04:47<05:34, 68.2kB/s]
+ 48%|####7 | 19.7M/41.5M [04:47<05:25, 70.2kB/s]
+ 48%|####7 | 19.7M/41.5M [04:47<05:33, 68.3kB/s]
+ 48%|####7 | 19.7M/41.5M [04:47<05:03, 75.2kB/s]
+ 48%|####7 | 19.8M/41.5M [04:48<05:03, 75.0kB/s]
+ 48%|####7 | 19.8M/41.5M [04:48<06:04, 62.6kB/s]
+ 48%|####7 | 19.8M/41.5M [04:48<05:48, 65.2kB/s]
+ 48%|####7 | 19.8M/41.5M [04:48<05:03, 74.9kB/s]
+ 48%|####7 | 19.8M/41.5M [04:49<05:56, 63.7kB/s]
+ 48%|####7 | 19.8M/41.5M [04:49<06:18, 59.9kB/s]
+ 48%|####7 | 19.8M/41.5M [04:49<06:40, 56.7kB/s]
+ 48%|####7 | 19.9M/41.5M [04:49<08:46, 43.1kB/s]
+ 48%|####7 | 19.9M/41.5M [04:50<06:23, 59.2kB/s]
+ 48%|####7 | 19.9M/41.5M [04:50<06:45, 55.8kB/s]
+ 48%|####7 | 19.9M/41.5M [04:50<06:18, 59.8kB/s]
+ 48%|####7 | 19.9M/41.5M [04:50<06:41, 56.3kB/s]
+ 48%|####7 | 19.9M/41.5M [04:51<08:56, 42.2kB/s]
+ 48%|####8 | 19.9M/41.5M [04:51<09:37, 39.1kB/s]
+ 48%|####8 | 19.9M/41.5M [04:52<10:33, 35.7kB/s]
+ 48%|####8 | 20.0M/41.5M [04:52<11:06, 33.9kB/s]
+ 48%|####8 | 20.0M/41.5M [04:53<13:41, 27.5kB/s]
+ 48%|####8 | 20.0M/41.5M [04:53<16:01, 23.5kB/s]
+ 48%|####8 | 20.0M/41.5M [04:53<16:03, 23.4kB/s]
+ 48%|####8 | 20.0M/41.5M [04:54<16:05, 23.4kB/s]
+ 48%|####8 | 20.0M/41.5M [04:54<13:57, 26.9kB/s]
+ 48%|####8 | 20.0M/41.5M [04:54<12:20, 30.4kB/s]
+ 48%|####8 | 20.0M/41.5M [04:55<13:25, 28.0kB/s]
+ 48%|####8 | 20.0M/41.5M [04:55<11:53, 31.6kB/s]
+ 48%|####8 | 20.0M/41.5M [04:55<10:46, 34.8kB/s]
+ 48%|####8 | 20.0M/41.5M [04:55<07:42, 48.6kB/s]
+ 48%|####8 | 20.0M/41.5M [04:55<07:48, 48.0kB/s]
+ 48%|####8 | 20.1M/41.5M [04:55<07:52, 47.6kB/s]
+ 48%|####8 | 20.1M/41.5M [04:56<06:14, 60.0kB/s]
+ 48%|####8 | 20.1M/41.5M [04:56<06:39, 56.2kB/s]
+ 48%|####8 | 20.1M/41.5M [04:56<07:00, 53.4kB/s]
+ 48%|####8 | 20.1M/41.5M [04:56<07:26, 50.2kB/s]
+ 49%|####8 | 20.1M/41.5M [04:57<06:27, 57.8kB/s]
+ 49%|####8 | 20.1M/41.5M [04:57<04:56, 75.6kB/s]
+ 49%|####8 | 20.2M/41.5M [04:57<04:40, 79.7kB/s]
+ 49%|####8 | 20.2M/41.5M [04:57<05:38, 66.1kB/s]
+ 49%|####8 | 20.2M/41.5M [04:58<06:02, 61.7kB/s]
+ 49%|####8 | 20.2M/41.5M [04:58<09:31, 39.0kB/s]
+ 49%|####8 | 20.2M/41.5M [04:59<10:24, 35.7kB/s]
+ 49%|####8 | 20.2M/41.5M [04:59<13:10, 28.2kB/s]
+ 49%|####8 | 20.2M/41.5M [04:59<12:00, 30.9kB/s]
+ 49%|####8 | 20.2M/41.5M [05:00<12:58, 28.6kB/s]
+ 49%|####8 | 20.2M/41.5M [05:00<11:41, 31.7kB/s]
+ 49%|####8 | 20.2M/41.5M [05:00<12:51, 28.9kB/s]
+ 49%|####8 | 20.3M/41.5M [05:00<11:30, 32.3kB/s]
+ 49%|####8 | 20.3M/41.5M [05:01<10:30, 35.3kB/s]
+ 49%|####8 | 20.3M/41.5M [05:01<09:46, 37.9kB/s]
+ 49%|####8 | 20.3M/41.5M [05:01<09:15, 40.1kB/s]
+ 49%|####8 | 20.3M/41.5M [05:01<08:52, 41.7kB/s]
+ 49%|####8 | 20.3M/41.5M [05:01<08:36, 43.0kB/s]
+ 49%|####8 | 20.3M/41.5M [05:02<09:16, 39.9kB/s]
+ 49%|####8 | 20.3M/41.5M [05:02<08:02, 46.0kB/s]
+ 49%|####8 | 20.3M/41.5M [05:02<08:01, 46.1kB/s]
+ 49%|####8 | 20.3M/41.5M [05:02<08:00, 46.2kB/s]
+ 49%|####9 | 20.3M/41.5M [05:02<07:59, 46.2kB/s]
+ 49%|####9 | 20.4M/41.5M [05:02<06:08, 60.1kB/s]
+ 49%|####9 | 20.4M/41.5M [05:03<06:35, 56.0kB/s]
+ 49%|####9 | 20.4M/41.5M [05:03<05:30, 67.0kB/s]
+ 49%|####9 | 20.4M/41.5M [05:03<06:03, 60.8kB/s]
+ 49%|####9 | 20.4M/41.5M [05:03<06:31, 56.5kB/s]
+ 49%|####9 | 20.4M/41.5M [05:03<05:28, 67.3kB/s]
+ 49%|####9 | 20.4M/41.5M [05:03<06:01, 61.1kB/s]
+ 49%|####9 | 20.4M/41.5M [05:04<06:30, 56.6kB/s]
+ 49%|####9 | 20.4M/41.5M [05:04<05:27, 67.5kB/s]
+ 49%|####9 | 20.4M/41.5M [05:04<06:00, 61.2kB/s]
+ 49%|####9 | 20.5M/41.5M [05:04<05:12, 70.6kB/s]
+ 49%|####9 | 20.5M/41.5M [05:04<04:45, 77.3kB/s]
+ 49%|####9 | 20.5M/41.5M [05:04<04:28, 81.9kB/s]
+ 49%|####9 | 20.5M/41.5M [05:05<05:08, 71.3kB/s]
+ 49%|####9 | 20.5M/41.5M [05:05<04:42, 77.7kB/s]
+ 49%|####9 | 20.5M/41.5M [05:05<04:27, 82.2kB/s]
+ 50%|####9 | 20.5M/41.5M [05:05<04:17, 85.3kB/s]
+ 50%|####9 | 20.6M/41.5M [05:05<04:10, 87.6kB/s]
+ 50%|####9 | 20.6M/41.5M [05:06<04:06, 89.1kB/s]
+ 50%|####9 | 20.6M/41.5M [05:06<04:02, 90.2kB/s]
+ 50%|####9 | 20.6M/41.5M [05:06<04:00, 91.0kB/s]
+ 50%|####9 | 20.6M/41.5M [05:06<03:58, 91.6kB/s]
+ 50%|####9 | 20.6M/41.5M [05:06<03:57, 92.0kB/s]
+ 50%|####9 | 20.7M/41.5M [05:06<03:25, 106kB/s]
+ 50%|####9 | 20.7M/41.5M [05:07<03:33, 102kB/s]
+ 50%|####9 | 20.7M/41.5M [05:07<03:12, 113kB/s]
+ 50%|####9 | 20.7M/41.5M [05:07<03:00, 121kB/s]
+ 50%|##### | 20.8M/41.5M [05:07<02:52, 126kB/s]
+ 50%|##### | 20.8M/41.5M [05:07<04:02, 89.5kB/s]
+ 50%|##### | 20.8M/41.5M [05:08<02:53, 125kB/s]
+ 50%|##### | 20.8M/41.5M [05:08<02:48, 128kB/s]
+ 50%|##### | 20.8M/41.5M [05:08<03:14, 111kB/s]
+ 50%|##### | 20.9M/41.5M [05:08<03:09, 114kB/s]
+ 50%|##### | 20.9M/41.5M [05:08<03:35, 100kB/s]
+ 50%|##### | 20.9M/41.5M [05:09<03:39, 98.3kB/s]
+ 50%|##### | 20.9M/41.5M [05:09<04:30, 79.7kB/s]
+ 50%|##### | 20.9M/41.5M [05:09<04:34, 78.5kB/s]
+ 50%|##### | 20.9M/41.5M [05:09<04:23, 81.7kB/s]
+ 51%|##### | 21.0M/41.5M [05:09<04:14, 84.6kB/s]
+ 51%|##### | 21.0M/41.5M [05:10<04:07, 86.9kB/s]
+ 51%|##### | 21.0M/41.5M [05:10<04:02, 88.5kB/s]
+ 51%|##### | 21.0M/41.5M [05:10<05:24, 66.1kB/s]
+ 51%|##### | 21.0M/41.5M [05:10<04:18, 83.0kB/s]
+ 51%|##### | 21.0M/41.5M [05:11<04:42, 75.8kB/s]
+ 51%|##### | 21.1M/41.5M [05:11<04:43, 75.5kB/s]
+ 51%|##### | 21.1M/41.5M [05:11<05:13, 68.4kB/s]
+ 51%|##### | 21.1M/41.5M [05:11<04:46, 74.7kB/s]
+ 51%|##### | 21.1M/41.5M [05:11<05:19, 67.1kB/s]
+ 51%|##### | 21.1M/41.5M [05:12<05:08, 69.3kB/s]
+ 51%|##### | 21.1M/41.5M [05:12<04:42, 75.7kB/s]
+ 51%|##### | 21.1M/41.5M [05:12<04:54, 72.6kB/s]
+ 51%|##### | 21.1M/41.5M [05:12<04:51, 73.1kB/s]
+ 51%|##### | 21.1M/41.5M [05:12<05:02, 70.6kB/s]
+ 51%|#####1 | 21.2M/41.5M [05:12<04:10, 84.9kB/s]
+ 51%|#####1 | 21.2M/41.5M [05:13<04:04, 87.2kB/s]
+ 51%|#####1 | 21.2M/41.5M [05:13<03:57, 89.4kB/s]
+ 51%|#####1 | 21.2M/41.5M [05:13<03:55, 90.4kB/s]
+ 51%|#####1 | 21.2M/41.5M [05:13<03:37, 97.8kB/s]
+ 51%|#####1 | 21.3M/41.5M [05:14<04:25, 79.9kB/s]
+ 51%|#####1 | 21.3M/41.5M [05:14<03:21, 105kB/s]
+ 51%|#####1 | 21.3M/41.5M [05:14<03:27, 102kB/s]
+ 51%|#####1 | 21.3M/41.5M [05:14<03:31, 100kB/s]
+ 51%|#####1 | 21.3M/41.5M [05:14<03:35, 98.1kB/s]
+ 51%|#####1 | 21.4M/41.5M [05:14<03:24, 103kB/s]
+ 52%|#####1 | 21.4M/41.5M [05:15<03:30, 100kB/s]
+ 52%|#####1 | 21.4M/41.5M [05:15<03:08, 112kB/s]
+ 52%|#####1 | 21.4M/41.5M [05:15<03:18, 106kB/s]
+ 52%|#####1 | 21.4M/41.5M [05:15<03:01, 116kB/s]
+ 52%|#####1 | 21.4M/41.5M [05:15<03:12, 109kB/s]
+ 52%|#####1 | 21.5M/41.5M [05:15<03:21, 104kB/s]
+ 52%|#####1 | 21.5M/41.5M [05:16<03:02, 115kB/s]
+ 52%|#####1 | 21.5M/41.5M [05:16<02:51, 122kB/s]
+ 52%|#####1 | 21.5M/41.5M [05:16<03:31, 98.8kB/s]
+ 52%|#####1 | 21.5M/41.5M [05:16<02:57, 118kB/s]
+ 52%|#####1 | 21.6M/41.5M [05:17<04:05, 85.3kB/s]
+ 52%|#####2 | 21.6M/41.5M [05:17<03:46, 92.3kB/s]
+ 52%|#####2 | 21.6M/41.5M [05:17<05:00, 69.3kB/s]
+ 52%|#####2 | 21.6M/41.5M [05:17<04:39, 74.6kB/s]
+ 52%|#####2 | 21.6M/41.5M [05:17<04:42, 73.8kB/s]
+ 52%|#####2 | 21.6M/41.5M [05:18<05:30, 63.0kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:18<04:58, 69.6kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:18<05:24, 64.0kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:19<07:18, 47.4kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:19<05:05, 68.0kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:19<06:51, 50.5kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:19<06:58, 49.6kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:20<08:45, 39.5kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:20<06:46, 50.9kB/s]
+ 52%|#####2 | 21.7M/41.5M [05:20<06:55, 49.9kB/s]
+ 52%|#####2 | 21.8M/41.5M [05:20<07:02, 49.0kB/s]
+ 52%|#####2 | 21.8M/41.5M [05:20<05:41, 60.5kB/s]
+ 52%|#####2 | 21.8M/41.5M [05:20<06:04, 56.7kB/s]
+ 53%|#####2 | 21.8M/41.5M [05:21<05:29, 62.7kB/s]
+ 53%|#####2 | 21.8M/41.5M [05:21<05:31, 62.2kB/s]
+ 53%|#####2 | 21.8M/41.5M [05:21<05:58, 57.6kB/s]
+ 53%|#####2 | 21.8M/41.5M [05:21<05:24, 63.6kB/s]
+ 53%|#####2 | 21.8M/41.5M [05:21<05:27, 62.9kB/s]
+ 53%|#####2 | 21.8M/41.5M [05:22<04:46, 71.9kB/s]
+ 53%|#####2 | 21.9M/41.5M [05:22<05:20, 64.2kB/s]
+ 53%|#####2 | 21.9M/41.5M [05:22<04:42, 72.8kB/s]
+ 53%|#####2 | 21.9M/41.5M [05:22<04:20, 78.8kB/s]
+ 53%|#####2 | 21.9M/41.5M [05:22<04:07, 83.0kB/s]
+ 53%|#####2 | 21.9M/41.5M [05:22<03:58, 85.9kB/s]
+ 53%|#####2 | 21.9M/41.5M [05:23<03:53, 88.0kB/s]
+ 53%|#####2 | 22.0M/41.5M [05:23<03:18, 103kB/s]
+ 53%|#####2 | 22.0M/41.5M [05:23<03:24, 100kB/s]
+ 53%|#####3 | 22.0M/41.5M [05:23<03:02, 112kB/s]
+ 53%|#####3 | 22.0M/41.5M [05:23<03:12, 106kB/s]
+ 53%|#####3 | 22.0M/41.5M [05:24<04:19, 78.6kB/s]
+ 53%|#####3 | 22.1M/41.5M [05:24<02:54, 117kB/s]
+ 53%|#####3 | 22.1M/41.5M [05:24<03:04, 110kB/s]
+ 53%|#####3 | 22.1M/41.5M [05:24<03:12, 106kB/s]
+ 53%|#####3 | 22.1M/41.5M [05:24<02:56, 115kB/s]
+ 53%|#####3 | 22.1M/41.5M [05:25<03:06, 109kB/s]
+ 53%|#####3 | 22.2M/41.5M [05:25<03:42, 91.1kB/s]
+ 53%|#####3 | 22.2M/41.5M [05:25<02:57, 114kB/s]
+ 54%|#####3 | 22.2M/41.5M [05:25<03:06, 109kB/s]
+ 54%|#####3 | 22.2M/41.5M [05:25<03:04, 110kB/s]
+ 54%|#####3 | 22.2M/41.5M [05:26<03:49, 88.1kB/s]
+ 54%|#####3 | 22.3M/41.5M [05:26<02:53, 116kB/s]
+ 54%|#####3 | 22.3M/41.5M [05:26<03:03, 110kB/s]
+ 54%|#####3 | 22.3M/41.5M [05:26<02:53, 116kB/s]
+ 54%|#####3 | 22.3M/41.5M [05:26<03:08, 106kB/s]
+ 54%|#####3 | 22.3M/41.5M [05:27<03:05, 108kB/s]
+ 54%|#####3 | 22.4M/41.5M [05:27<02:51, 117kB/s]
+ 54%|#####3 | 22.4M/41.5M [05:27<02:44, 122kB/s]
+ 54%|#####4 | 22.4M/41.5M [05:27<02:53, 115kB/s]
+ 54%|#####4 | 22.4M/41.5M [05:27<02:43, 122kB/s]
+ 54%|#####4 | 22.5M/41.5M [05:28<02:37, 127kB/s]
+ 54%|#####4 | 22.5M/41.5M [05:28<02:50, 117kB/s]
+ 54%|#####4 | 22.5M/41.5M [05:28<02:41, 124kB/s]
+ 54%|#####4 | 22.5M/41.5M [05:28<03:41, 89.8kB/s]
+ 54%|#####4 | 22.5M/41.5M [05:28<02:48, 118kB/s]
+ 54%|#####4 | 22.6M/41.5M [05:29<02:58, 111kB/s]
+ 54%|#####4 | 22.6M/41.5M [05:29<03:06, 106kB/s]
+ 54%|#####4 | 22.6M/41.5M [05:29<03:12, 103kB/s]
+ 54%|#####4 | 22.6M/41.5M [05:29<03:18, 99.9kB/s]
+ 55%|#####4 | 22.6M/41.5M [05:29<04:20, 76.0kB/s]
+ 55%|#####4 | 22.6M/41.5M [05:30<04:06, 80.1kB/s]
+ 55%|#####4 | 22.7M/41.5M [05:30<04:14, 77.5kB/s]
+ 55%|#####4 | 22.7M/41.5M [05:30<04:45, 69.0kB/s]
+ 55%|#####4 | 22.7M/41.5M [05:30<04:24, 74.6kB/s]
+ 55%|#####4 | 22.7M/41.5M [05:31<05:11, 63.3kB/s]
+ 55%|#####4 | 22.7M/41.5M [05:31<04:34, 71.7kB/s]
+ 55%|#####4 | 22.7M/41.5M [05:31<04:44, 69.2kB/s]
+ 55%|#####4 | 22.8M/41.5M [05:31<04:23, 74.4kB/s]
+ 55%|#####4 | 22.8M/41.5M [05:32<04:08, 78.9kB/s]
+ 55%|#####4 | 22.8M/41.5M [05:32<03:57, 82.4kB/s]
+ 55%|#####4 | 22.8M/41.5M [05:32<03:50, 85.1kB/s]
+ 55%|#####5 | 22.8M/41.5M [05:32<04:17, 76.1kB/s]
+ 55%|#####5 | 22.8M/41.5M [05:32<04:03, 80.4kB/s]
+ 55%|#####5 | 22.9M/41.5M [05:33<03:53, 83.7kB/s]
+ 55%|#####5 | 22.9M/41.5M [05:33<03:46, 86.2kB/s]
+ 55%|#####5 | 22.9M/41.5M [05:33<04:44, 68.5kB/s]
+ 55%|#####5 | 22.9M/41.5M [05:33<03:48, 85.4kB/s]
+ 55%|#####5 | 22.9M/41.5M [05:33<03:43, 87.3kB/s]
+ 55%|#####5 | 22.9M/41.5M [05:34<03:39, 88.8kB/s]
+ 55%|#####5 | 23.0M/41.5M [05:34<03:36, 89.9kB/s]
+ 55%|#####5 | 23.0M/41.5M [05:34<03:34, 90.7kB/s]
+ 55%|#####5 | 23.0M/41.5M [05:34<03:32, 91.3kB/s]
+ 55%|#####5 | 23.0M/41.5M [05:34<03:28, 92.7kB/s]
+ 55%|#####5 | 23.0M/41.5M [05:35<03:28, 92.8kB/s]
+ 56%|#####5 | 23.0M/41.5M [05:35<03:28, 92.7kB/s]
+ 56%|#####5 | 23.1M/41.5M [05:35<03:02, 106kB/s]
+ 56%|#####5 | 23.1M/41.5M [05:35<03:09, 102kB/s]
+ 56%|#####5 | 23.1M/41.5M [05:35<03:14, 99.4kB/s]
+ 56%|#####5 | 23.1M/41.5M [05:36<03:44, 85.8kB/s]
+ 56%|#####5 | 23.1M/41.5M [05:36<02:54, 110kB/s]
+ 56%|#####5 | 23.2M/41.5M [05:36<03:01, 106kB/s]
+ 56%|#####5 | 23.2M/41.5M [05:36<03:08, 102kB/s]
+ 56%|#####5 | 23.2M/41.5M [05:36<03:12, 99.5kB/s]
+ 56%|#####5 | 23.2M/41.5M [05:37<03:16, 97.5kB/s]
+ 56%|#####5 | 23.2M/41.5M [05:37<03:19, 96.0kB/s]
+ 56%|#####6 | 23.2M/41.5M [05:37<03:21, 95.0kB/s]
+ 56%|#####6 | 23.3M/41.5M [05:37<03:37, 88.1kB/s]
+ 56%|#####6 | 23.3M/41.5M [05:37<03:33, 89.4kB/s]
+ 56%|#####6 | 23.3M/41.5M [05:37<03:31, 90.3kB/s]
+ 56%|#####6 | 23.3M/41.5M [05:38<03:29, 91.0kB/s]
+ 56%|#####6 | 23.3M/41.5M [05:38<03:28, 91.6kB/s]
+ 56%|#####6 | 23.3M/41.5M [05:38<03:27, 91.9kB/s]
+ 56%|#####6 | 23.4M/41.5M [05:38<03:11, 99.4kB/s]
+ 56%|#####6 | 23.4M/41.5M [05:38<03:15, 97.3kB/s]
+ 56%|#####6 | 23.4M/41.5M [05:38<02:52, 110kB/s]
+ 56%|#####6 | 23.4M/41.5M [05:39<02:39, 119kB/s]
+ 56%|#####6 | 23.4M/41.5M [05:39<02:50, 111kB/s]
+ 57%|#####6 | 23.5M/41.5M [05:39<02:38, 120kB/s]
+ 57%|#####6 | 23.5M/41.5M [05:39<02:30, 126kB/s]
+ 57%|#####6 | 23.5M/41.5M [05:39<02:25, 130kB/s]
+ 57%|#####6 | 23.5M/41.5M [05:40<02:22, 132kB/s]
+ 57%|#####6 | 23.5M/41.5M [05:40<03:02, 103kB/s]
+ 57%|#####6 | 23.6M/41.5M [05:40<02:18, 135kB/s]
+ 57%|#####6 | 23.6M/41.5M [05:40<02:25, 129kB/s]
+ 57%|#####6 | 23.6M/41.5M [05:40<02:23, 130kB/s]
+ 57%|#####6 | 23.6M/41.5M [05:40<02:23, 131kB/s]
+ 57%|#####6 | 23.6M/41.5M [05:41<02:25, 129kB/s]
+ 57%|#####7 | 23.7M/41.5M [05:41<02:21, 132kB/s]
+ 57%|#####7 | 23.7M/41.5M [05:41<02:12, 141kB/s]
+ 57%|#####7 | 23.7M/41.5M [05:41<02:13, 140kB/s]
+ 57%|#####7 | 23.7M/41.5M [05:41<02:13, 139kB/s]
+ 57%|#####7 | 23.8M/41.5M [05:41<02:13, 139kB/s]
+ 57%|#####7 | 23.8M/41.5M [05:42<03:02, 102kB/s]
+ 57%|#####7 | 23.8M/41.5M [05:42<02:01, 153kB/s]
+ 57%|#####7 | 23.8M/41.5M [05:42<02:09, 143kB/s]
+ 58%|#####7 | 23.9M/41.5M [05:42<02:29, 123kB/s]
+ 58%|#####7 | 23.9M/41.5M [05:43<02:34, 119kB/s]
+ 58%|#####7 | 23.9M/41.5M [05:43<02:28, 124kB/s]
+ 58%|#####7 | 23.9M/41.5M [05:43<03:02, 101kB/s]
+ 58%|#####7 | 24.0M/41.5M [05:43<02:11, 140kB/s]
+ 58%|#####7 | 24.0M/41.5M [05:43<02:20, 131kB/s]
+ 58%|#####7 | 24.0M/41.5M [05:44<02:17, 133kB/s]
+ 58%|#####7 | 24.0M/41.5M [05:44<02:19, 131kB/s]
+ 58%|#####7 | 24.1M/41.5M [05:44<02:05, 145kB/s]
+ 58%|#####8 | 24.1M/41.5M [05:44<02:11, 139kB/s]
+ 58%|#####8 | 24.1M/41.5M [05:44<02:15, 134kB/s]
+ 58%|#####8 | 24.1M/41.5M [05:44<02:14, 136kB/s]
+ 58%|#####8 | 24.1M/41.5M [05:44<02:01, 150kB/s]
+ 58%|#####8 | 24.2M/41.5M [05:45<01:53, 161kB/s]
+ 58%|#####8 | 24.2M/41.5M [05:45<01:58, 153kB/s]
+ 58%|#####8 | 24.2M/41.5M [05:45<02:05, 144kB/s]
+ 58%|#####8 | 24.2M/41.5M [05:45<02:11, 138kB/s]
+ 58%|#####8 | 24.2M/41.5M [05:45<02:10, 138kB/s]
+ 58%|#####8 | 24.3M/41.5M [05:45<01:59, 152kB/s]
+ 59%|#####8 | 24.3M/41.5M [05:45<01:51, 162kB/s]
+ 59%|#####8 | 24.3M/41.5M [05:46<01:56, 154kB/s]
+ 59%|#####8 | 24.3M/41.5M [05:46<01:51, 162kB/s]
+ 59%|#####8 | 24.4M/41.5M [05:46<01:56, 154kB/s]
+ 59%|#####8 | 24.4M/41.5M [05:46<01:51, 162kB/s]
+ 59%|#####8 | 24.4M/41.5M [05:46<01:46, 169kB/s]
+ 59%|#####8 | 24.4M/41.5M [05:46<01:42, 175kB/s]
+ 59%|#####8 | 24.4M/41.5M [05:46<01:41, 177kB/s]
+ 59%|#####8 | 24.5M/41.5M [05:47<01:43, 172kB/s]
+ 59%|#####9 | 24.5M/41.5M [05:47<01:47, 166kB/s]
+ 59%|#####9 | 24.5M/41.5M [05:47<02:17, 129kB/s]
+ 59%|#####9 | 24.6M/41.5M [05:47<01:38, 181kB/s]
+ 59%|#####9 | 24.6M/41.5M [05:47<01:44, 169kB/s]
+ 59%|#####9 | 24.6M/41.5M [05:48<01:49, 162kB/s]
+ 59%|#####9 | 24.7M/41.5M [05:48<01:44, 169kB/s]
+ 59%|#####9 | 24.7M/41.5M [05:48<01:41, 174kB/s]
+ 60%|#####9 | 24.7M/41.5M [05:48<02:21, 124kB/s]
+ 60%|#####9 | 24.7M/41.5M [05:48<01:55, 152kB/s]
+ 60%|#####9 | 24.8M/41.5M [05:49<01:42, 171kB/s]
+ 60%|#####9 | 24.8M/41.5M [05:49<01:48, 162kB/s]
+ 60%|#####9 | 24.8M/41.5M [05:49<01:52, 155kB/s]
+ 60%|#####9 | 24.8M/41.5M [05:49<02:14, 130kB/s]
+ 60%|#####9 | 24.9M/41.5M [05:49<01:57, 148kB/s]
+ 60%|#####9 | 24.9M/41.5M [05:50<02:39, 109kB/s]
+ 60%|###### | 24.9M/41.5M [05:50<02:23, 121kB/s]
+ 60%|###### | 24.9M/41.5M [05:50<02:28, 117kB/s]
+ 60%|###### | 25.0M/41.5M [05:50<02:37, 110kB/s]
+ 60%|###### | 25.0M/41.5M [05:50<02:44, 106kB/s]
+ 60%|###### | 25.0M/41.5M [05:51<02:49, 102kB/s]
+ 60%|###### | 25.0M/41.5M [05:51<02:37, 109kB/s]
+ 60%|###### | 25.0M/41.5M [05:51<02:40, 108kB/s]
+ 60%|###### | 25.0M/41.5M [05:51<02:31, 114kB/s]
+ 60%|###### | 25.1M/41.5M [05:51<02:35, 111kB/s]
+ 60%|###### | 25.1M/41.5M [05:52<02:28, 116kB/s]
+ 61%|###### | 25.1M/41.5M [05:52<02:20, 123kB/s]
+ 61%|###### | 25.1M/41.5M [05:52<02:26, 117kB/s]
+ 61%|###### | 25.1M/41.5M [05:52<02:18, 124kB/s]
+ 61%|###### | 25.2M/41.5M [05:52<02:13, 129kB/s]
+ 61%|###### | 25.2M/41.5M [05:52<02:09, 132kB/s]
+ 61%|###### | 25.2M/41.5M [05:53<02:00, 141kB/s]
+ 61%|###### | 25.2M/41.5M [05:53<02:01, 141kB/s]
+ 61%|###### | 25.3M/41.5M [05:53<01:59, 142kB/s]
+ 61%|###### | 25.3M/41.5M [05:53<02:00, 141kB/s]
+ 61%|######1 | 25.3M/41.5M [05:53<01:49, 155kB/s]
+ 61%|######1 | 25.4M/41.5M [05:53<01:43, 164kB/s]
+ 61%|######1 | 25.4M/41.5M [05:54<01:47, 156kB/s]
+ 61%|######1 | 25.4M/41.5M [05:54<01:42, 165kB/s]
+ 61%|######1 | 25.4M/41.5M [05:54<01:38, 171kB/s]
+ 61%|######1 | 25.5M/41.5M [05:54<01:35, 176kB/s]
+ 61%|######1 | 25.5M/41.5M [05:54<01:33, 179kB/s]
+ 62%|######1 | 25.5M/41.5M [05:55<01:32, 181kB/s]
+ 62%|######1 | 25.6M/41.5M [05:55<01:25, 196kB/s]
+ 62%|######1 | 25.6M/41.5M [05:55<01:20, 207kB/s]
+ 62%|######1 | 25.7M/41.5M [05:55<01:12, 228kB/s]
+ 62%|######1 | 25.7M/41.5M [05:55<01:08, 243kB/s]
+ 62%|######2 | 25.8M/41.5M [05:55<01:01, 268kB/s]
+ 62%|######2 | 25.8M/41.5M [05:56<00:57, 285kB/s]
+ 62%|######2 | 25.9M/41.5M [05:56<00:52, 311kB/s]
+ 63%|######2 | 26.0M/41.5M [05:56<00:45, 356kB/s]
+ 63%|######2 | 26.0M/41.5M [05:56<00:53, 304kB/s]
+ 63%|######2 | 26.1M/41.5M [05:56<00:39, 411kB/s]
+ 63%|######3 | 26.2M/41.5M [05:56<00:40, 399kB/s]
+ 63%|######3 | 26.2M/41.5M [05:57<00:42, 376kB/s]
+ 63%|######3 | 26.3M/41.5M [05:57<00:53, 298kB/s]
+ 64%|######3 | 26.4M/41.5M [05:57<00:41, 386kB/s]
+ 64%|######3 | 26.4M/41.5M [05:57<00:44, 358kB/s]
+ 64%|######3 | 26.5M/41.5M [05:58<00:57, 274kB/s]
+ 64%|######4 | 26.6M/41.5M [05:58<00:47, 329kB/s]
+ 64%|######4 | 26.6M/41.5M [05:58<00:51, 305kB/s]
+ 64%|######4 | 26.7M/41.5M [05:58<00:52, 298kB/s]
+ 64%|######4 | 26.7M/41.5M [05:58<00:55, 280kB/s]
+ 64%|######4 | 26.8M/41.5M [05:59<00:58, 266kB/s]
+ 65%|######4 | 26.8M/41.5M [05:59<00:57, 270kB/s]
+ 65%|######4 | 26.8M/41.5M [05:59<00:56, 272kB/s]
+ 65%|######4 | 26.9M/41.5M [05:59<00:58, 260kB/s]
+ 65%|######4 | 26.9M/41.5M [05:59<00:57, 266kB/s]
+ 65%|######5 | 27.0M/41.5M [05:59<00:56, 269kB/s]
+ 65%|######5 | 27.0M/41.5M [06:00<00:55, 272kB/s]
+ 65%|######5 | 27.1M/41.5M [06:00<00:58, 260kB/s]
+ 65%|######5 | 27.1M/41.5M [06:00<00:56, 266kB/s]
+ 65%|######5 | 27.2M/41.5M [06:00<00:55, 269kB/s]
+ 66%|######5 | 27.2M/41.5M [06:00<00:55, 272kB/s]
+ 66%|######5 | 27.2M/41.5M [06:01<00:54, 274kB/s]
+ 66%|######5 | 27.3M/41.5M [06:01<00:51, 289kB/s]
+ 66%|######5 | 27.4M/41.5M [06:01<00:51, 286kB/s]
+ 66%|######6 | 27.4M/41.5M [06:01<00:52, 284kB/s]
+ 66%|######6 | 27.5M/41.5M [06:01<00:49, 296kB/s]
+ 66%|######6 | 27.5M/41.5M [06:01<00:50, 291kB/s]
+ 66%|######6 | 27.6M/41.5M [06:02<00:48, 301kB/s]
+ 67%|######6 | 27.6M/41.5M [06:02<00:47, 308kB/s]
+ 67%|######6 | 27.7M/41.5M [06:02<00:48, 299kB/s]
+ 67%|######6 | 27.7M/41.5M [06:02<00:47, 307kB/s]
+ 67%|######6 | 27.8M/41.5M [06:02<00:46, 312kB/s]
+ 67%|######7 | 27.8M/41.5M [06:02<00:43, 330kB/s]
+ 67%|######7 | 27.9M/41.5M [06:03<00:43, 328kB/s]
+ 67%|######7 | 27.9M/41.5M [06:03<00:43, 327kB/s]
+ 67%|######7 | 28.0M/41.5M [06:03<00:41, 340kB/s]
+ 68%|######7 | 28.1M/41.5M [06:03<00:41, 336kB/s]
+ 68%|######7 | 28.1M/41.5M [06:03<00:40, 346kB/s]
+ 68%|######7 | 28.2M/41.5M [06:04<00:39, 354kB/s]
+ 68%|######8 | 28.2M/41.5M [06:04<00:38, 359kB/s]
+ 68%|######8 | 28.3M/41.5M [06:04<00:35, 390kB/s]
+ 68%|######8 | 28.4M/41.5M [06:04<00:34, 399kB/s]
+ 69%|######8 | 28.5M/41.5M [06:04<00:32, 418kB/s]
+ 69%|######8 | 28.6M/41.5M [06:04<00:30, 446kB/s]
+ 69%|######9 | 28.6M/41.5M [06:05<00:28, 465kB/s]
+ 69%|######9 | 28.7M/41.5M [06:05<00:27, 492kB/s]
+ 70%|######9 | 28.8M/41.5M [06:05<00:25, 526kB/s]
+ 70%|######9 | 28.9M/41.5M [06:05<00:23, 563kB/s]
+ 70%|####### | 29.1M/41.5M [06:05<00:21, 602kB/s]
+ 70%|####### | 29.2M/41.5M [06:05<00:20, 631kB/s]
+ 71%|####### | 29.3M/41.5M [06:06<00:18, 678kB/s]
+ 71%|####### | 29.4M/41.5M [06:06<00:16, 756kB/s]
+ 71%|#######1 | 29.6M/41.5M [06:06<00:15, 829kB/s]
+ 72%|#######1 | 29.7M/41.5M [06:06<00:14, 854kB/s]
+ 72%|#######1 | 29.8M/41.5M [06:06<00:14, 824kB/s]
+ 72%|#######2 | 29.9M/41.5M [06:06<00:14, 856kB/s]
+ 72%|#######2 | 30.1M/41.5M [06:06<00:12, 962kB/s]
+ 73%|#######2 | 30.2M/41.5M [06:07<00:16, 706kB/s]
+ 73%|#######3 | 30.4M/41.5M [06:07<00:11, 1.00MB/s]
+ 74%|#######3 | 30.5M/41.5M [06:07<00:11, 972kB/s]
+ 74%|#######3 | 30.7M/41.5M [06:07<00:11, 1.00MB/s]
+ 74%|#######4 | 30.8M/41.5M [06:07<00:16, 703kB/s]
+ 75%|#######4 | 30.9M/41.5M [06:08<00:13, 819kB/s]
+ 75%|#######4 | 31.1M/41.5M [06:08<00:14, 770kB/s]
+ 75%|#######5 | 31.2M/41.5M [06:08<00:14, 735kB/s]
+ 75%|#######5 | 31.3M/41.5M [06:08<00:14, 756kB/s]
+ 76%|#######5 | 31.4M/41.5M [06:08<00:13, 783kB/s]
+ 76%|#######5 | 31.5M/41.5M [06:08<00:17, 591kB/s]
+ 76%|#######6 | 31.6M/41.5M [06:09<00:14, 715kB/s]
+ 76%|#######6 | 31.7M/41.5M [06:09<00:14, 693kB/s]
+ 77%|#######6 | 31.8M/41.5M [06:09<00:16, 635kB/s]
+ 77%|#######6 | 31.9M/41.5M [06:09<00:16, 610kB/s]
+ 77%|#######7 | 32.0M/41.5M [06:09<00:16, 594kB/s]
+ 77%|#######7 | 32.1M/41.5M [06:09<00:15, 627kB/s]
+ 77%|#######7 | 32.1M/41.5M [06:10<00:16, 605kB/s]
+ 78%|#######7 | 32.2M/41.5M [06:10<00:18, 527kB/s]
+ 78%|#######7 | 32.3M/41.5M [06:10<00:17, 557kB/s]
+ 78%|#######7 | 32.4M/41.5M [06:10<00:16, 592kB/s]
+ 78%|#######8 | 32.4M/41.5M [06:10<00:15, 630kB/s]
+ 78%|#######8 | 32.5M/41.5M [06:10<00:15, 606kB/s]
+ 79%|#######8 | 32.6M/41.5M [06:10<00:16, 562kB/s]
+ 79%|#######8 | 32.7M/41.5M [06:11<00:15, 598kB/s]
+ 79%|#######8 | 32.8M/41.5M [06:11<00:14, 637kB/s]
+ 79%|#######9 | 32.8M/41.5M [06:11<00:14, 609kB/s]
+ 79%|#######9 | 32.9M/41.5M [06:11<00:16, 563kB/s]
+ 79%|#######9 | 33.0M/41.5M [06:11<00:14, 600kB/s]
+ 80%|#######9 | 33.1M/41.5M [06:11<00:13, 639kB/s]
+ 80%|#######9 | 33.1M/41.5M [06:11<00:14, 609kB/s]
+ 80%|#######9 | 33.2M/41.5M [06:11<00:15, 563kB/s]
+ 80%|######## | 33.3M/41.5M [06:12<00:14, 600kB/s]
+ 80%|######## | 33.4M/41.5M [06:12<00:13, 640kB/s]
+ 81%|######## | 33.4M/41.5M [06:12<00:13, 609kB/s]
+ 81%|######## | 33.5M/41.5M [06:12<00:14, 563kB/s]
+ 81%|######## | 33.6M/41.5M [06:12<00:13, 618kB/s]
+ 81%|########1 | 33.7M/41.5M [06:12<00:12, 652kB/s]
+ 81%|########1 | 33.7M/41.5M [06:12<00:13, 619kB/s]
+ 81%|########1 | 33.8M/41.5M [06:13<00:14, 569kB/s]
+ 82%|########1 | 33.9M/41.5M [06:13<00:13, 605kB/s]
+ 82%|########1 | 34.0M/41.5M [06:13<00:12, 642kB/s]
+ 82%|########2 | 34.0M/41.5M [06:13<00:12, 612kB/s]
+ 82%|########2 | 34.1M/41.5M [06:13<00:13, 565kB/s]
+ 82%|########2 | 34.2M/41.5M [06:13<00:12, 601kB/s]
+ 83%|########2 | 34.3M/41.5M [06:13<00:11, 641kB/s]
+ 83%|########2 | 34.3M/41.5M [06:13<00:12, 610kB/s]
+ 83%|########2 | 34.4M/41.5M [06:14<00:12, 582kB/s]
+ 83%|########3 | 34.5M/41.5M [06:14<00:11, 628kB/s]
+ 83%|########3 | 34.6M/41.5M [06:14<00:10, 663kB/s]
+ 84%|########3 | 34.7M/41.5M [06:14<00:11, 645kB/s]
+ 84%|########3 | 34.7M/41.5M [06:14<00:12, 590kB/s]
+ 84%|########3 | 34.8M/41.5M [06:14<00:11, 635kB/s]
+ 84%|########4 | 34.9M/41.5M [06:14<00:10, 640kB/s]
+ 84%|########4 | 35.0M/41.5M [06:15<00:09, 701kB/s]
+ 85%|########4 | 35.1M/41.5M [06:15<00:09, 673kB/s]
+ 85%|########4 | 35.2M/41.5M [06:15<00:10, 629kB/s]
+ 85%|########5 | 35.3M/41.5M [06:15<00:09, 680kB/s]
+ 85%|########5 | 35.4M/41.5M [06:15<00:12, 502kB/s]
+ 86%|########5 | 35.5M/41.5M [06:15<00:09, 688kB/s]
+ 86%|########5 | 35.6M/41.5M [06:15<00:09, 672kB/s]
+ 86%|########6 | 35.7M/41.5M [06:16<00:09, 672kB/s]
+ 86%|########6 | 35.8M/41.5M [06:16<00:09, 652kB/s]
+ 86%|########6 | 35.8M/41.5M [06:16<00:10, 570kB/s]
+ 87%|########6 | 35.9M/41.5M [06:16<00:10, 577kB/s]
+ 87%|########6 | 36.0M/41.5M [06:16<00:09, 637kB/s]
+ 87%|########7 | 36.1M/41.5M [06:16<00:08, 665kB/s]
+ 87%|########7 | 36.2M/41.5M [06:16<00:08, 646kB/s]
+ 87%|########7 | 36.2M/41.5M [06:17<00:09, 593kB/s]
+ 88%|########7 | 36.3M/41.5M [06:17<00:08, 615kB/s]
+ 88%|########7 | 36.5M/41.5M [06:17<00:08, 642kB/s]
+ 88%|########8 | 36.6M/41.5M [06:17<00:07, 660kB/s]
+ 88%|########8 | 36.7M/41.5M [06:17<00:07, 671kB/s]
+ 89%|########8 | 36.8M/41.5M [06:17<00:06, 733kB/s]
+ 89%|########8 | 36.9M/41.5M [06:18<00:08, 560kB/s]
+ 89%|########9 | 37.0M/41.5M [06:18<00:06, 699kB/s]
+ 89%|########9 | 37.1M/41.5M [06:18<00:07, 641kB/s]
+ 90%|########9 | 37.2M/41.5M [06:18<00:07, 613kB/s]
+ 90%|########9 | 37.3M/41.5M [06:18<00:07, 584kB/s]
+ 90%|######### | 37.4M/41.5M [06:19<00:07, 576kB/s]
+ 90%|######### | 37.5M/41.5M [06:19<00:07, 584kB/s]
+ 91%|######### | 37.6M/41.5M [06:19<00:07, 576kB/s]
+ 91%|######### | 37.7M/41.5M [06:19<00:06, 584kB/s]
+ 91%|#########1| 37.8M/41.5M [06:19<00:06, 590kB/s]
+ 91%|#########1| 37.9M/41.5M [06:19<00:06, 594kB/s]
+ 92%|#########1| 38.0M/41.5M [06:20<00:06, 596kB/s]
+ 92%|#########1| 38.1M/41.5M [06:20<00:07, 494kB/s]
+ 92%|#########2| 38.2M/41.5M [06:20<00:05, 616kB/s]
+ 92%|#########2| 38.3M/41.5M [06:20<00:06, 558kB/s]
+ 92%|#########2| 38.3M/41.5M [06:20<00:06, 530kB/s]
+ 93%|#########2| 38.4M/41.5M [06:20<00:06, 521kB/s]
+ 93%|#########2| 38.5M/41.5M [06:21<00:05, 520kB/s]
+ 93%|#########3| 38.6M/41.5M [06:21<00:05, 517kB/s]
+ 93%|#########3| 38.7M/41.5M [06:21<00:05, 529kB/s]
+ 93%|#########3| 38.8M/41.5M [06:21<00:05, 537kB/s]
+ 94%|#########3| 38.9M/41.5M [06:21<00:05, 543kB/s]
+ 94%|#########3| 39.0M/41.5M [06:22<00:04, 547kB/s]
+ 94%|#########4| 39.1M/41.5M [06:22<00:04, 550kB/s]
+ 94%|#########4| 39.2M/41.5M [06:22<00:04, 566kB/s]
+ 95%|#########4| 39.3M/41.5M [06:22<00:04, 577kB/s]
+ 95%|#########4| 39.4M/41.5M [06:22<00:03, 585kB/s]
+ 95%|#########5| 39.5M/41.5M [06:22<00:03, 590kB/s]
+ 95%|#########5| 39.6M/41.5M [06:23<00:03, 594kB/s]
+ 96%|#########5| 39.7M/41.5M [06:23<00:03, 611kB/s]
+ 96%|#########5| 39.8M/41.5M [06:23<00:02, 608kB/s]
+ 96%|#########6| 39.9M/41.5M [06:23<00:02, 607kB/s]
+ 96%|#########6| 40.0M/41.5M [06:23<00:02, 606kB/s]
+ 97%|#########6| 40.1M/41.5M [06:23<00:02, 619kB/s]
+ 97%|#########6| 40.2M/41.5M [06:24<00:02, 614kB/s]
+ 97%|#########7| 40.3M/41.5M [06:24<00:02, 611kB/s]
+ 97%|#########7| 40.4M/41.5M [06:24<00:01, 608kB/s]
+ 98%|#########7| 40.5M/41.5M [06:24<00:01, 607kB/s]
+ 98%|#########7| 40.6M/41.5M [06:24<00:01, 620kB/s]
+ 98%|#########8| 40.7M/41.5M [06:25<00:01, 615kB/s]
+ 98%|#########8| 40.8M/41.5M [06:25<00:01, 611kB/s]
+ 99%|#########8| 40.9M/41.5M [06:25<00:00, 623kB/s]
+ 99%|#########8| 41.0M/41.5M [06:25<00:00, 617kB/s]
+ 99%|#########9| 41.1M/41.5M [06:25<00:00, 613kB/s]
+ 99%|#########9| 41.2M/41.5M [06:25<00:00, 624kB/s]
+100%|#########9| 41.3M/41.5M [06:26<00:00, 512kB/s]
+100%|#########9| 41.4M/41.5M [06:26<00:00, 604kB/s]
+100%|##########| 41.5M/41.5M [06:26<00:00, 113kB/s]
</pre></div>
</div>
</div>
@@ -582,6 +2369,7 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
OneFlow top-1 id: 281, class name: tabby, tabby cat
</pre></div>
</div>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 6 minutes 50.106 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-oneflow-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/f7ae979fbe61064749ce0fb7a621eb4c/from_oneflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_oneflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_paddle.html b/docs/how_to/compile_models/from_paddle.html
index ac8dfbfd0..1303b736d 100644
--- a/docs/how_to/compile_models/from_paddle.html
+++ b/docs/how_to/compile_models/from_paddle.html
@@ -464,7 +464,7 @@ A quick solution is</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>TVM prediction top-1 id: 282, class name: 282: 'tiger cat',
</pre></div>
</div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 23.283 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 4.115 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-paddle-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/16269b77359771348d507395692524cf/from_paddle.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_paddle.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 803221adf..ec3b03922 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -387,10 +387,8 @@ be unstable.</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
0%| | 0.00/44.7M [00:00<?, ?B/s]
- 10%|# | 4.61M/44.7M [00:00<00:00, 48.2MB/s]
- 42%|####2 | 18.8M/44.7M [00:00<00:00, 107MB/s]
- 73%|#######3 | 32.8M/44.7M [00:00<00:00, 125MB/s]
-100%|##########| 44.7M/44.7M [00:00<00:00, 128MB/s]
+ 53%|#####2 | 23.6M/44.7M [00:00<00:00, 248MB/s]
+100%|##########| 44.7M/44.7M [00:00<00:00, 272MB/s]
</pre></div>
</div>
</div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 0d3420129..5dd595095 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -607,7 +607,6 @@ banana (score = 0.00022)
desk (score = 0.00019)
</pre></div>
</div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 2.150 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index a702d5bcf..d9a2fc933 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -300,18 +300,18 @@
<div class="section" id="computation-times">
<span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:35.207</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>11:30.048</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
<ul class="simple">
-<li><p><strong>01:23.283</strong>: <a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></li>
-<li><p><strong>01:02.150</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
-<li><p><strong>00:55.986</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
-<li><p><strong>00:31.428</strong>: <a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></li>
-<li><p><strong>00:24.838</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
-<li><p><strong>00:21.133</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
-<li><p><strong>00:20.815</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
-<li><p><strong>00:19.230</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
-<li><p><strong>00:13.473</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
-<li><p><strong>00:02.871</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
+<li><p><strong>06:50.106</strong>: <a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></li>
+<li><p><strong>01:04.115</strong>: <a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></li>
+<li><p><strong>00:59.156</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
+<li><p><strong>00:56.570</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
+<li><p><strong>00:24.524</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
+<li><p><strong>00:20.600</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
+<li><p><strong>00:20.526</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
+<li><p><strong>00:19.185</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
+<li><p><strong>00:12.821</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
+<li><p><strong>00:02.445</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
</ul>
</div>
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index 5c8dd1de6..27e3e90fa 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -622,7 +622,7 @@ to the remote android device.</p>
Evaluate inference time cost...
Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 16.1288 16.1018 16.3555 16.0681 0.0847
+ 15.8762 15.5255 16.7675 15.4631 0.4818
</pre></div>
</div>
</div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index 98e175eea..d2854c1b9 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -409,60 +409,13 @@ be unstable.</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
0%| | 0.00/170M [00:00<?, ?B/s]
- 1%|1 | 1.94M/170M [00:00<00:08, 20.0MB/s]
- 2%|2 | 3.84M/170M [00:00<00:09, 17.7MB/s]
- 4%|3 | 6.73M/170M [00:00<00:07, 23.1MB/s]
- 5%|5 | 9.01M/170M [00:00<00:07, 23.1MB/s]
- 7%|6 | 11.9M/170M [00:00<00:06, 25.4MB/s]
- 8%|8 | 14.3M/170M [00:00<00:06, 25.0MB/s]
- 10%|9 | 16.9M/170M [00:00<00:06, 25.6MB/s]
- 12%|#1 | 20.1M/170M [00:00<00:05, 28.0MB/s]
- 13%|#3 | 22.8M/170M [00:01<00:06, 22.9MB/s]
- 15%|#4 | 25.1M/170M [00:01<00:07, 21.3MB/s]
- 16%|#6 | 27.5M/170M [00:01<00:06, 22.0MB/s]
- 17%|#7 | 29.7M/170M [00:01<00:06, 21.7MB/s]
- 20%|#9 | 33.4M/170M [00:01<00:05, 26.2MB/s]
- 21%|##1 | 36.1M/170M [00:01<00:05, 27.0MB/s]
- 23%|##2 | 38.8M/170M [00:01<00:05, 26.5MB/s]
- 25%|##4 | 42.4M/170M [00:01<00:04, 29.4MB/s]
- 28%|##7 | 47.1M/170M [00:01<00:03, 35.2MB/s]
- 30%|##9 | 50.7M/170M [00:01<00:03, 35.5MB/s]
- 32%|###2 | 55.1M/170M [00:02<00:03, 38.8MB/s]
- 35%|###4 | 58.9M/170M [00:02<00:03, 35.9MB/s]
- 37%|###6 | 62.4M/170M [00:02<00:03, 31.1MB/s]
- 39%|###8 | 65.5M/170M [00:02<00:03, 31.3MB/s]
- 40%|#### | 68.5M/170M [00:02<00:03, 29.8MB/s]
- 42%|####2 | 72.1M/170M [00:02<00:03, 31.7MB/s]
- 44%|####4 | 75.2M/170M [00:02<00:03, 26.7MB/s]
- 46%|####5 | 77.9M/170M [00:03<00:04, 22.9MB/s]
- 48%|####8 | 82.3M/170M [00:03<00:03, 27.9MB/s]
- 51%|##### | 86.0M/170M [00:03<00:02, 30.6MB/s]
- 52%|#####2 | 89.2M/170M [00:03<00:03, 28.1MB/s]
- 55%|#####4 | 92.7M/170M [00:03<00:02, 29.5MB/s]
- 56%|#####6 | 95.6M/170M [00:03<00:03, 25.3MB/s]
- 58%|#####8 | 98.7M/170M [00:03<00:02, 26.4MB/s]
- 60%|#####9 | 102M/170M [00:03<00:02, 27.7MB/s]
- 62%|######2 | 106M/170M [00:03<00:02, 30.6MB/s]
- 65%|######4 | 110M/170M [00:04<00:01, 32.0MB/s]
- 67%|######6 | 113M/170M [00:04<00:01, 33.6MB/s]
- 69%|######8 | 117M/170M [00:04<00:01, 35.2MB/s]
- 71%|####### | 120M/170M [00:04<00:01, 35.2MB/s]
- 73%|#######3 | 125M/170M [00:04<00:01, 37.7MB/s]
- 76%|#######5 | 128M/170M [00:04<00:01, 29.8MB/s]
- 77%|#######7 | 131M/170M [00:04<00:01, 27.2MB/s]
- 80%|######## | 136M/170M [00:04<00:01, 33.1MB/s]
- 83%|########2 | 140M/170M [00:05<00:00, 34.5MB/s]
- 85%|########4 | 144M/170M [00:05<00:00, 32.5MB/s]
- 87%|########6 | 147M/170M [00:05<00:00, 27.3MB/s]
- 88%|########8 | 150M/170M [00:05<00:00, 25.0MB/s]
- 90%|########9 | 152M/170M [00:05<00:00, 25.0MB/s]
- 91%|#########1| 155M/170M [00:05<00:00, 24.2MB/s]
- 93%|#########2| 158M/170M [00:05<00:00, 26.2MB/s]
- 95%|#########4| 161M/170M [00:05<00:00, 24.7MB/s]
- 96%|#########5| 163M/170M [00:06<00:00, 24.3MB/s]
- 97%|#########7| 165M/170M [00:06<00:00, 24.0MB/s]
- 99%|#########8| 168M/170M [00:06<00:00, 22.8MB/s]
-100%|##########| 170M/170M [00:06<00:00, 27.9MB/s]
+ 11%|#1 | 19.3M/170M [00:00<00:00, 203MB/s]
+ 27%|##6 | 45.8M/170M [00:00<00:00, 246MB/s]
+ 42%|####2 | 72.1M/170M [00:00<00:00, 260MB/s]
+ 58%|#####7 | 98.3M/170M [00:00<00:00, 266MB/s]
+ 73%|#######3 | 124M/170M [00:00<00:00, 269MB/s]
+ 88%|########8 | 150M/170M [00:00<00:00, 269MB/s]
+100%|##########| 170M/170M [00:00<00:00, 264MB/s]
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
for i in range(dim)
/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -555,7 +508,7 @@ torchvision rcnn models.</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
</pre></div>
</div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes 10.453 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes 56.175 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index 49afbc178..9e75e1d6a 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -450,12 +450,7 @@ training. Other models require a full post training calibration.</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
0%| | 0.00/13.6M [00:00<?, ?B/s]
- 10%|# | 1.38M/13.6M [00:00<00:00, 14.5MB/s]
- 33%|###3 | 4.54M/13.6M [00:00<00:00, 25.4MB/s]
- 51%|#####1 | 6.97M/13.6M [00:00<00:00, 24.1MB/s]
- 73%|#######3 | 9.90M/13.6M [00:00<00:00, 25.3MB/s]
- 91%|######### | 12.3M/13.6M [00:00<00:00, 24.2MB/s]
-100%|##########| 13.6M/13.6M [00:00<00:00, 23.7MB/s]
+100%|##########| 13.6M/13.6M [00:00<00:00, 167MB/s]
</pre></div>
</div>
</div>
@@ -544,7 +539,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
<p class="sphx-glr-script-out">Out:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 90.2412 90.2071 90.8011 90.0909 0.1241
+ 90.1441 90.0327 91.7263 89.8729 0.2893
</pre></div>
</div>
<div class="admonition note">
@@ -583,7 +578,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
<div class="section" id="deploy-a-quantized-tflite-model">
<h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
<p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 5.816 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 3.230 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index bf826ceac..60e710a4d 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -540,7 +540,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
<p class="sphx-glr-script-out">Out:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
- 117.0151 116.9941 119.1223 115.3432 0.9252
+ 117.5831 117.4965 119.9891 116.6112 0.6789
</pre></div>
</div>
<div class="admonition note">
@@ -568,7 +568,7 @@ network for ARM CPU</span></a>.</p></li>
</ul>
</div></blockquote>
</div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 58.607 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 55.881 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 7887de8ac..9377f3719 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -480,7 +480,7 @@ for calibration. But the accuracy might be impacted.</p>
DeprecationWarning,
</pre></div>
</div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes 21.769 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 7.308 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index 3a6069fe6..bb1d68eb5 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -415,22 +415,22 @@ to your device.</p>
Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
0%| | 0/132723 [00:00<?, ?KB/s]
- 5%|5 | 6999/132723 [00:00<00:01, 69969.38KB/s]
- 12%|#1 | 15727/132723 [00:00<00:01, 80147.99KB/s]
- 19%|#8 | 24560/132723 [00:00<00:01, 83881.04KB/s]
- 25%|##4 | 32949/132723 [00:00<00:01, 80221.28KB/s]
- 31%|###1 | 41794/132723 [00:00<00:01, 83102.74KB/s]
- 38%|###8 | 50653/132723 [00:00<00:00, 84931.94KB/s]
- 45%|####4 | 59479/132723 [00:00<00:00, 86003.43KB/s]
- 52%|#####1 | 68362/132723 [00:00<00:00, 86889.43KB/s]
- 58%|#####8 | 77304/132723 [00:00<00:00, 87674.86KB/s]
- 65%|######4 | 86187/132723 [00:01<00:00, 88027.99KB/s]
- 72%|#######1 | 95042/132723 [00:01<00:00, 88183.97KB/s]
- 78%|#######8 | 103982/132723 [00:01<00:00, 88551.68KB/s]
- 85%|########5 | 112918/132723 [00:01<00:00, 88793.65KB/s]
- 92%|#########1| 121800/132723 [00:01<00:00, 88484.18KB/s]
- 98%|#########8| 130681/132723 [00:01<00:00, 88579.50KB/s]
-100%|##########| 132723/132723 [00:01<00:00, 86307.82KB/s]
+ 5%|5 | 6757/132723 [00:00<00:01, 67559.27KB/s]
+ 12%|#1 | 15560/132723 [00:00<00:01, 79595.33KB/s]
+ 18%|#8 | 24463/132723 [00:00<00:01, 83901.77KB/s]
+ 25%|##5 | 33310/132723 [00:00<00:01, 85702.78KB/s]
+ 32%|###1 | 42202/132723 [00:00<00:01, 86861.86KB/s]
+ 38%|###8 | 51061/132723 [00:00<00:00, 87447.02KB/s]
+ 45%|####5 | 59862/132723 [00:00<00:00, 87630.07KB/s]
+ 52%|#####1 | 68626/132723 [00:00<00:00, 87538.91KB/s]
+ 58%|#####8 | 77439/132723 [00:00<00:00, 87717.76KB/s]
+ 65%|######4 | 86267/132723 [00:01<00:00, 87889.71KB/s]
+ 72%|#######1 | 95173/132723 [00:01<00:00, 88246.73KB/s]
+ 78%|#######8 | 104014/132723 [00:01<00:00, 88287.51KB/s]
+ 85%|########5 | 112862/132723 [00:01<00:00, 88340.22KB/s]
+ 92%|#########1| 121697/132723 [00:01<00:00, 88298.19KB/s]
+ 98%|#########8| 130598/132723 [00:01<00:00, 88509.55KB/s]
+100%|##########| 132723/132723 [00:01<00:00, 86935.39KB/s]
</pre></div>
</div>
<p>Create TVM runtime and do inference
@@ -470,7 +470,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
</pre></div>
</div>
<img alt="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" />
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes 17.852 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes 17.619 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index 6c6daf89e..21e75e40f 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -300,16 +300,16 @@
<div class="section" id="computation-times">
<span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>11:45.672</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>10:09.203</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
<ul class="simple">
-<li><p><strong>03:10.453</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
-<li><p><strong>02:21.769</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
-<li><p><strong>02:17.852</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
-<li><p><strong>01:58.607</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
-<li><p><strong>01:05.816</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
-<li><p><strong>00:28.562</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
-<li><p><strong>00:22.433</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
-<li><p><strong>00:00.181</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
+<li><p><strong>02:56.175</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
+<li><p><strong>02:17.619</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
+<li><p><strong>01:55.881</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
+<li><p><strong>01:07.308</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
+<li><p><strong>01:03.230</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
+<li><p><strong>00:27.226</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
+<li><p><strong>00:21.587</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
+<li><p><strong>00:00.176</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
</ul>
</div>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 8cc08a226..9fa9de663 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -588,7 +588,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip094144d5-fdd2-4b2b-9231-75f8c8ece3c7 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip7d4ccf53-5f97-4de0-9c7a-502154829388 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
</pre></div>
</div>
<p>It’s easy to execute MobileNet with native TVM:</p>
@@ -650,7 +650,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Check failed: (lower) is false: Intrinsic lowering function for target llvm, intrinsic name tir.sqrt, type 150 not found
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Check failed: (lower) is false: FloatImm lowering function for target llvm type 150 not found
</pre></div>
</div>
<p>When we attempt to run the model, we get a familiar error telling us that more functions need to be registerd for myfloat.</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index efcaea7a2..55aa1da7c 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -300,12 +300,12 @@
<div class="section" id="computation-times">
<span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:37.308</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:36.948</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
<ul class="simple">
-<li><p><strong>00:33.934</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
-<li><p><strong>00:02.178</strong>: <a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></li>
-<li><p><strong>00:01.008</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
-<li><p><strong>00:00.187</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
+<li><p><strong>00:33.587</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
+<li><p><strong>00:02.171</strong>: <a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></li>
+<li><p><strong>00:01.009</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
+<li><p><strong>00:00.181</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
</ul>
</div>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index c0945d6ae..6d1d3a07e 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -486,10 +486,10 @@ profile the execution time of each passes.</p>
</div>
<p class="sphx-glr-script-out">Out:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 5844us [5844us] (45.16%; 45.16%)
-FoldScaleAxis: 7098us [2us] (54.84%; 54.84%)
- FoldConstant: 7096us [1481us] (54.83%; 99.97%)
- InferType: 5615us [5615us] (43.39%; 79.13%)
+InferType: 6108us [6108us] (45.64%; 45.64%)
+FoldScaleAxis: 7276us [2us] (54.36%; 54.36%)
+ FoldConstant: 7273us [1522us] (54.35%; 99.97%)
+ InferType: 5751us [5751us] (42.97%; 79.07%)
</pre></div>
</div>
</div>
@@ -512,10 +512,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
</div>
<p class="sphx-glr-script-out">Out:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 5678us [5678us] (44.65%; 44.65%)
-FoldScaleAxis: 7040us [2us] (55.35%; 55.35%)
- FoldConstant: 7038us [1455us] (55.34%; 99.98%)
- InferType: 5583us [5583us] (43.90%; 79.32%)
+InferType: 5810us [5810us] (44.54%; 44.54%)
+FoldScaleAxis: 7233us [2us] (55.46%; 55.46%)
+ FoldConstant: 7232us [1511us] (55.44%; 99.98%)
+ InferType: 5721us [5721us] (43.86%; 79.11%)
</pre></div>
</div>
<p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index 3ef397bab..31ea9939b 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -534,7 +534,7 @@ latency of convolution.</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 35.339016 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 40.653210 ms
</pre></div>
</div>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index 7b88b6a14..b76083088 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -878,7 +878,7 @@ be able to run on our build server</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 8.956960 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 10.205289 ms
</pre></div>
</div>
</div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index b569b3fa5..5a584683e 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -431,8 +431,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.018245
-Baseline: 3.306084
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.017612
+Baseline: 3.390872
</pre></div>
</div>
<p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -494,7 +494,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.289051
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.307004
</pre></div>
</div>
<p>Here is the generated IR after blocking.</p>
@@ -563,7 +563,7 @@ vastly.</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.329916
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.338097
</pre></div>
</div>
<p>Here is the generated IR after vectorization.</p>
@@ -626,7 +626,7 @@ the access pattern for A matrix is more cache friendly.</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.117861
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.112429
</pre></div>
</div>
<p>Here is the generated IR after loop permutation.</p>
@@ -711,7 +711,7 @@ flattening.</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110710
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110156
</pre></div>
</div>
<p>Here is the generated IR after array packing.</p>
@@ -799,7 +799,7 @@ write to C when all the block results are ready.</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111893
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111282
</pre></div>
</div>
<p>Here is the generated IR after blocking.</p>
@@ -891,7 +891,7 @@ write to C when all the block results are ready.</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.146405
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.144558
</pre></div>
</div>
<p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index 6f84b9684..dd0b72136 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -300,11 +300,11 @@
<div class="section" id="computation-times">
<span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:34.502</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:34.835</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
<ul class="simple">
-<li><p><strong>00:31.909</strong>: <a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></li>
-<li><p><strong>00:01.424</strong>: <a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></li>
-<li><p><strong>00:01.169</strong>: <a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></li>
+<li><p><strong>00:32.206</strong>: <a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></li>
+<li><p><strong>00:01.432</strong>: <a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></li>
+<li><p><strong>00:01.198</strong>: <a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></li>
</ul>
</div>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index 1fc5c3894..0b282aed0 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -300,14 +300,14 @@
<div class="section" id="computation-times">
<span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:00.449</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>04:50.844</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
<ul class="simple">
-<li><p><strong>02:28.084</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
-<li><p><strong>01:18.796</strong>: <a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></li>
-<li><p><strong>00:39.963</strong>: <a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></li>
-<li><p><strong>00:17.032</strong>: <a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></li>
-<li><p><strong>00:08.373</strong>: <a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></li>
-<li><p><strong>00:08.201</strong>: <a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></li>
+<li><p><strong>02:19.891</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
+<li><p><strong>01:17.690</strong>: <a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></li>
+<li><p><strong>00:39.503</strong>: <a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></li>
+<li><p><strong>00:16.807</strong>: <a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></li>
+<li><p><strong>00:08.768</strong>: <a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></li>
+<li><p><strong>00:08.185</strong>: <a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></li>
</ul>
</div>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index 3a12ddb02..e5d56c3f0 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -471,11 +471,11 @@ cooperative fetching, unrolling and operator fusion.</p>
buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 32;
- allocate(conv2d_nchw: Pointer(local float32), float32, [16]), storage_scope = local;
- allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
- allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
- attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
- conv2d_nchw_1: Buffer(conv2d_nchw, float32, [16], [], scope="local", align=64)[0] = 0f32
+ allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+ allocate(pad_temp.shared: Pointer(shared float32), float32, [1568]), storage_scope = shared;
+ allocate(kernel.shared: Pointer(shared float32), float32, [512]), storage_scope = shared;
+ attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+ conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
conv2d_nchw_1[1] = 0f32
conv2d_nchw_1[2] = 0f32
conv2d_nchw_1[3] = 0f32
@@ -489,943 +489,74 @@ cooperative fetching, unrolling and operator fusion.</p>
conv2d_nchw_1[11] = 0f32
conv2d_nchw_1[12] = 0f32
conv2d_nchw_1[13] = 0f32
- conv2d_nchw_1[14] = 0f32
- conv2d_nchw_1[15] = 0f32
for (rc.outer.outer: int32, 0, 16) {
- for (rx.outer.outer: int32, 0, 3) {
- let cse_var_2: int32 = (rc.outer.outer*1568)
- let cse_var_1: int32 = (rc.outer.outer*288)
- {
- attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 49)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 7), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(thre [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 14), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(thr [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 147)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 21), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 28), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 245)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 35), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 42), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 343)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 49), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 56), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 441)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 335)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 70), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 539)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 77), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 84), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 637)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 91), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 98), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(th [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 735)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 105), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(t [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 112), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(t [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 833)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 119), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 678)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 931)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 133), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(t [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 140), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(t [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1029)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 147), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 154), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1127)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 161), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 168), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1225)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 175), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 182), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1323)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1021)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 196), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1421)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 203), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 210), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1519)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 217), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 224), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1617)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 231), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 238), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1715)] = @tir.if_then_else((((floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 245), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 7)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) + 1364)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1813)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 259), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 266), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1911)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 273), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod( [...]
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((cse_var_2 + (floordiv((floordiv(threadIdx.x_1, 7) + 280), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 1), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
- attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- if @tir.likely((threadIdx.x_1 < 7), dtype=bool) {
- pad_temp.shared_1[(threadIdx.x_1 + 2009)] = 0f32
- }
- attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope="shared")[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 49)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 49), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 49), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 98)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 98), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 2), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 147)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 147), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 51), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 196)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 196), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 4), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 245)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 245), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 53), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 294)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 294), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 6), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 343)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 343), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 55), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 392), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 8), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 441)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 441), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 57), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 490)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 490), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 10), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 539)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 539), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 59), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 588)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 588), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 12), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 637)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 637), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 61), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 686)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 686), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 14), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 735)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 735), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 63), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 16), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 833)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 833), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 65), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 882)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 882), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 18), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 931)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 931), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 67), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 980)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 980), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 20), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1029)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1029), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 69), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1078)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1078), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 22), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1127)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1127), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 71), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1176), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 24), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1225)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1225), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 73), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1274)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1274), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 26), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1323)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1323), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 75), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1372)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1372), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 28), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1421)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1421), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 77), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- kernel.shared_1[(threadIdx.x_2 + 1470)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1470), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 30), 96)*3)) + rx.outer.outer)]
- attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
- if @tir.likely((threadIdx.x_2 < 17), dtype=bool) {
- kernel.shared_1[(threadIdx.x_2 + 1519)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1519), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 79), 96)*3)) + rx.outer.outer)]
- }
- for (rc.outer.inner: int32, 0, 2) {
- let cse_var_3: int32 = (rc.outer.inner*48)
- {
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[cse_var_3]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 96)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 192)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 288)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 97)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 193)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 289)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 2)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 98)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 194)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 290)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 3)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 99)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 195)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 291)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 4)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 100)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 196)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 292)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 5)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 101)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 197)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 293)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 6)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 102)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 198)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 294)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 7)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 103)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 199)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 295)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 8)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 104)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 200)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 296)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 9)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 105)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 201)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 297)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 10)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 106)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 202)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 298)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 11)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 107)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 203)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 299)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 12)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 108)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 204)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 300)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 13)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 109)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 205)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 301)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 14)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 110)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 206)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 302)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 15)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 111)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 207)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 303)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 16)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 112)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 208)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 304)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 17)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 113)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 209)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 305)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 18)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 114)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 210)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 306)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 19)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 115)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 211)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 307)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 20)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 116)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 212)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 308)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 21)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 117)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 213)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 309)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 22)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 118)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 214)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 310)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 23)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 119)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 215)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 311)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 24)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 120)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 216)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 312)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 25)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 121)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 217)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 313)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 26)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 122)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 218)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 314)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 27)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 123)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 219)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 315)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 28)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 124)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 220)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 316)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 29)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 125)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 221)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 317)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 30)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 126)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 222)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 318)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 31)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 127)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 223)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 319)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 32)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 128)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 224)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 320)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 33)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 129)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 225)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 321)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 34)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 130)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 226)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 322)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 35)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 131)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 227)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 323)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 36)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 132)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 228)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 324)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 37)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 133)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 229)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 325)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 38)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 134)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 230)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 326)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 39)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 135)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 231)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 327)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 40)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 136)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 232)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 328)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 41)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 137)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 233)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 329)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 42)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 138)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 234)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 330)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 43)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 139)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 235)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 331)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 44)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 140)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 236)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 332)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 45)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 141)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 237)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 333)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 46)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 142)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 238)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 334)]))
- conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 47)]))
- conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 143)]))
- conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 239)]))
- conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 335)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 384)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 480)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 576)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 672)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 385)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 481)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 577)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 673)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 386)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 482)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 578)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 674)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 387)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 483)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 579)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 675)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 388)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 484)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 580)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 676)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 389)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 485)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 581)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 677)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 390)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 486)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 582)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 678)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 391)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 487)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 583)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 679)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 392)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 488)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 584)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 680)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 393)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 489)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 585)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 681)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 394)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 490)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 586)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 682)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 395)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 491)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 587)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 683)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 396)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 492)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 588)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 684)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 397)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 493)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 589)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 685)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 398)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 494)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 590)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 686)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 399)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 495)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 591)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 687)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 400)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 496)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 592)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 688)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 401)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 497)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 593)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 689)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 402)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 498)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 594)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 690)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 403)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 499)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 595)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 691)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 404)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 500)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 596)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 692)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 405)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 501)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 597)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 693)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 406)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 502)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 598)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 694)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 407)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 503)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 599)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 695)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 408)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 504)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 600)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 696)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 409)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 505)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 601)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 697)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 410)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 506)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 602)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 698)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 411)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 507)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 603)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 699)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 412)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 508)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 604)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 700)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 413)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 509)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 605)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 701)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 414)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 510)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 606)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 702)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 415)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 511)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 607)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 703)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 416)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 512)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 608)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 704)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 417)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 513)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 609)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 705)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 418)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 514)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 610)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 706)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 419)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 515)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 611)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 707)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 420)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 516)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 612)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 708)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 421)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 517)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 613)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 709)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 422)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 518)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 614)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 710)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 423)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 519)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 615)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 711)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 424)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 520)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 616)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 712)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 425)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 521)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 617)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 713)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 426)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 522)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 618)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 714)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 427)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 523)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 619)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 715)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 428)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 524)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 620)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 716)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 429)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 525)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 621)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 717)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 430)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 526)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 622)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 718)]))
- conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 431)]))
- conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 527)]))
- conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 623)]))
- conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 719)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 768)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 864)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 960)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1056)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 769)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 865)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 961)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1057)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 770)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 866)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 962)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1058)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 771)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 867)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 963)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1059)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 772)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 868)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 964)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1060)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 773)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 869)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 965)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1061)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 774)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 870)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 966)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1062)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 775)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 871)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 967)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1063)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 776)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 872)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 968)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1064)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 777)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 873)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 969)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1065)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 778)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 874)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 970)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1066)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 779)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 875)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 971)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1067)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 780)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 876)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 972)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1068)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 781)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 877)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 973)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1069)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 782)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 878)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 974)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1070)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 783)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 879)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 975)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1071)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 784)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 880)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 976)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1072)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 785)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 881)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 977)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1073)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 786)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 882)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 978)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1074)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 787)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 883)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 979)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1075)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 788)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 884)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 980)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1076)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 789)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 885)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 981)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1077)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 790)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 886)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 982)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1078)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 791)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 887)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 983)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1079)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 792)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 888)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 984)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1080)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 793)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 889)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 985)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1081)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 794)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 890)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 986)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1082)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 795)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 891)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 987)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1083)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 796)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 892)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 988)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1084)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 797)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 893)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 989)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1085)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 798)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 894)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 990)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1086)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 799)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 895)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 991)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1087)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 800)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 896)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 992)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1088)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 801)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 897)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 993)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1089)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 802)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 898)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 994)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1090)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 803)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 899)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 995)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1091)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 804)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 900)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 996)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1092)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 805)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 901)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 997)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1093)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 806)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 902)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 998)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1094)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 807)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 903)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 999)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1095)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 808)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 904)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1000)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1096)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 809)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 905)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1001)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1097)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 810)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 906)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1002)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1098)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 811)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 907)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1003)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1099)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 812)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 908)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1004)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1100)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 813)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 909)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1005)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1101)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 814)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 910)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1006)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1102)]))
- conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 815)]))
- conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 911)]))
- conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1007)]))
- conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1103)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1152)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1248)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1344)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[((rc.outer.inner*1008) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 1440)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1153)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1249)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1345)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1441)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1154)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1250)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1346)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 1442)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1155)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1251)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1347)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 1443)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1156)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1252)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1348)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 1444)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1157)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1253)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1349)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 1445)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1158)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1254)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1350)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 1446)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1159)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1255)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1351)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 1447)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1160)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1256)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1352)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 1448)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1161)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1257)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1353)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 1449)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1162)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1258)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1354)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 1450)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1163)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1259)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1355)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 1451)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1164)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1260)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1356)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 252)]*kernel.shared_1[(cse_var_3 + 1452)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1165)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1261)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1357)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 259)]*kernel.shared_1[(cse_var_3 + 1453)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1166)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1262)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1358)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 266)]*kernel.shared_1[(cse_var_3 + 1454)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1167)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1263)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1359)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 315)]*kernel.shared_1[(cse_var_3 + 1455)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1168)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1264)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1360)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 322)]*kernel.shared_1[(cse_var_3 + 1456)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1169)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1265)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1361)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 329)]*kernel.shared_1[(cse_var_3 + 1457)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1170)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1266)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1362)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 378)]*kernel.shared_1[(cse_var_3 + 1458)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1171)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1267)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1363)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 385)]*kernel.shared_1[(cse_var_3 + 1459)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1172)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1268)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1364)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 392)]*kernel.shared_1[(cse_var_3 + 1460)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1173)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1269)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1365)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 441)]*kernel.shared_1[(cse_var_3 + 1461)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1174)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1270)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1366)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 448)]*kernel.shared_1[(cse_var_3 + 1462)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1175)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1271)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1367)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 455)]*kernel.shared_1[(cse_var_3 + 1463)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1176)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1272)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1368)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 504)]*kernel.shared_1[(cse_var_3 + 1464)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1177)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1273)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1369)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 511)]*kernel.shared_1[(cse_var_3 + 1465)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1178)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1274)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1370)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 518)]*kernel.shared_1[(cse_var_3 + 1466)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1179)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1275)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1371)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 567)]*kernel.shared_1[(cse_var_3 + 1467)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1180)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1276)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1372)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 574)]*kernel.shared_1[(cse_var_3 + 1468)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1181)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1277)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1373)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 581)]*kernel.shared_1[(cse_var_3 + 1469)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1182)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1278)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1374)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 630)]*kernel.shared_1[(cse_var_3 + 1470)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1183)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1279)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1375)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 637)]*kernel.shared_1[(cse_var_3 + 1471)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1184)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1280)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1376)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 644)]*kernel.shared_1[(cse_var_3 + 1472)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1185)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1281)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1377)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 693)]*kernel.shared_1[(cse_var_3 + 1473)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1186)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1282)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1378)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 700)]*kernel.shared_1[(cse_var_3 + 1474)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1187)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1283)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1379)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 707)]*kernel.shared_1[(cse_var_3 + 1475)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1188)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1284)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1380)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 756)]*kernel.shared_1[(cse_var_3 + 1476)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1189)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1285)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1381)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 763)]*kernel.shared_1[(cse_var_3 + 1477)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1190)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1286)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1382)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 770)]*kernel.shared_1[(cse_var_3 + 1478)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1191)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1287)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1383)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 819)]*kernel.shared_1[(cse_var_3 + 1479)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1192)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1288)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1384)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 826)]*kernel.shared_1[(cse_var_3 + 1480)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1193)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1289)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1385)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 833)]*kernel.shared_1[(cse_var_3 + 1481)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1194)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1290)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1386)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 882)]*kernel.shared_1[(cse_var_3 + 1482)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1195)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1291)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1387)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 889)]*kernel.shared_1[(cse_var_3 + 1483)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1196)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1292)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1388)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 896)]*kernel.shared_1[(cse_var_3 + 1484)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1197)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1293)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1389)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 945)]*kernel.shared_1[(cse_var_3 + 1485)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1198)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1294)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1390)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 952)]*kernel.shared_1[(cse_var_3 + 1486)]))
- conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1199)]))
- conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1295)]))
- conv2d_nchw_1[14] = (conv2d_nchw_1[14] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1391)]))
- conv2d_nchw_1[15] = (conv2d_nchw_1[15] + (pad_temp.shared_1[(((rc.outer.inner*1008) + threadIdx.x) + 959)]*kernel.shared_1[(cse_var_3 + 1487)]))
+ for (ry.outer.outer: int32, 0, 3) {
+ for (rx.outer.outer: int32, 0, 3) {
+ let cse_var_2: int32 = (rc.outer.outer*288)
+ let cse_var_1: int32 = (ry.outer.outer*3)
+ {
+ for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 28) {
+ let cse_var_3: int32 = (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56)
+ attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ pad_temp.shared_1: Buffer(pad_temp.shared, float32, [1568], [], scope="shared")[(cse_var_3 + threadIdx.x_1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7))) && ((ry.outer.outer + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*8) + floordiv(threadIdx.x_1, 7)), 7)) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && [...]
+ }
+ attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1: Buffer(kernel.shared, float32, [512], [], scope="shared")[threadIdx.x_2] = kernel[((((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 32)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 7), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 14), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 21), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 32256)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 35), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 42), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 16), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*73728) + (floordiv((floordiv(threadIdx.x_2, 8) + 49), 4)*4608)) + cse_var_2) + (floormod((threadIdx.x_2 + 8), 32)*9)) + cse_var_1) + rx.outer.outer)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[(((((((blockIdx.x*73728) + (floordiv(floordiv(threadIdx.x_2, 8), 4)*4608)) + cse_var_2) + (floormod(threadIdx.x_2, 32)*9)) + cse_var_1) + rx.outer.outer) + 64512)]
+ attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+ if @tir.likely((threadIdx.x_2 < 8), dtype=bool) {
+ kernel.shared_1[(threadIdx.x_2 + 504)] = kernel[((((((blockIdx.x*73728) + cse_var_2) + (floormod((threadIdx.x_2 + 24), 32)*9)) + cse_var_1) + rx.outer.outer) + 69120)]
+ }
+ for (rc.outer.inner: int32, 0, 16) {
+ for (ff.outer.inner: int32, 0, 2) {
+ let cse_var_10: int32 = (ff.outer.inner*7)
+ let cse_var_9: int32 = (cse_var_10 + 6)
+ let cse_var_8: int32 = (cse_var_10 + 5)
+ let cse_var_7: int32 = (cse_var_10 + 4)
+ let cse_var_6: int32 = (cse_var_10 + 3)
+ let cse_var_5: int32 = (cse_var_10 + 2)
+ let cse_var_4: int32 = (cse_var_10 + 1)
+ {
+ conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2))]))
+ conv2d_nchw_1[cse_var_10] = (conv2d_nchw_1[cse_var_10] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 49)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_4] = (conv2d_nchw_1[cse_var_4] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 50)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_5] = (conv2d_nchw_1[cse_var_5] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 51)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_6] = (conv2d_nchw_1[cse_var_6] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 52)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_7] = (conv2d_nchw_1[cse_var_7] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 53)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_8] = (conv2d_nchw_1[cse_var_8] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 54)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ conv2d_nchw_1[cse_var_9] = (conv2d_nchw_1[cse_var_9] + (pad_temp.shared_1[(((rc.outer.inner*98) + (floormod(threadIdx.x, 7)*7)) + 55)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*64) + (ff.outer.inner*32)) + (rc.outer.inner*2)) + 1)]))
+ }
+ }
}
}
}
}
}
- for (i1.inner: int32, 0, 16) {
- compute[(((blockIdx.x*784) + (i1.inner*49)) + threadIdx.x)] = max((conv2d_nchw_1[i1.inner] + bias[((blockIdx.x*16) + i1.inner)]), 0f32)
+ for (i1.inner: int32, 0, 2) {
+ for (i3.inner: int32, 0, 7) {
+ compute[(((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*16) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+ }
}
}
}
@@ -1463,7 +594,7 @@ cooperative fetching, unrolling and operator fusion.</p>
</pre></div>
</div>
<p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.229 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.330 ms
</pre></div>
</div>
</div>
@@ -1493,21 +624,21 @@ conv2d_nchw_nn_o_i, conv2d_nchw_nn_i = s[conv2d_nchw].split(conv2d_nchw_nn, fact
conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
-conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=4)
-conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=4)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=1)
+conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
-conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
+conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=7)
conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=16)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=2)
-conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
+conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
+conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
@@ -1515,14 +646,14 @@ s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nc
compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=16)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=1)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -1542,14 +673,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 1024)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 16)
s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
CUDA source code:
@@ -1567,10 +698,10 @@ CUDA source code:
#define int64_t long long
#define uint64_t unsigned long long
#endif
-extern "C" __global__ void __launch_bounds__(49) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
- float conv2d_nchw[16];
- __shared__ float pad_temp_shared[2016];
- __shared__ float kernel_shared[1536];
+extern "C" __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+ float conv2d_nchw[14];
+ __shared__ float pad_temp_shared[1568];
+ __shared__ float kernel_shared[512];
conv2d_nchw[0] = 0.000000e+00f;
conv2d_nchw[1] = 0.000000e+00f;
conv2d_nchw[2] = 0.000000e+00f;
@@ -1585,864 +716,51 @@ extern "C" __global__ void __launch_bounds__(49) default_function_kern
conv2d_nchw[11] = 0.000000e+00f;
conv2d_nchw[12] = 0.000000e+00f;
conv2d_nchw[13] = 0.000000e+00f;
- conv2d_nchw[14] = 0.000000e+00f;
- conv2d_nchw[15] = 0.000000e+00f;
for (int rc_outer_outer = 0; rc_outer_outer < 16; ++rc_outer_outer) {
- for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
- __syncthreads();
- pad_temp_shared[((int)threadIdx.x)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 49)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 49) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 98) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 147)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 147) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 196)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 196) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 245)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 245) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 294) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 343)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 343) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 392)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 392) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 441)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 335)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 490) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 539)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 539) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 588) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 637)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 637) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 686) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 735)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 735) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 784) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 833)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 833) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 882)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 678)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 931)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 931) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 980) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1029)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1029) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1078) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1127)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1127) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1176) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1225)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1225) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1274)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1274) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1323)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1021)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1372) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1421)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1421) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1470) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1519)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1519) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1568) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1617)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1617) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1666) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1715)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1715) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 2) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1764)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1364)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1813)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1813) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1862) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1911)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1911) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1960) / 63) * 49)) + (((((int)threadIdx.x) / 7) + 1) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
- if (((int)threadIdx.x) < 7) {
- pad_temp_shared[(((int)threadIdx.x) + 2009)] = 0.000000e+00f;
- }
- kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 49)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 49) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 49) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 98)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 98) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 2) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 147)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 147) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 51) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 196)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 196) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 4) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 245)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 245) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 53) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 294)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 294) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 6) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 343)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 343) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 55) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 392)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 8) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 441)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 441) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 57) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 490)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 490) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 10) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 539)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 539) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 59) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 588)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 588) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 12) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 637)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 637) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 61) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 686)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 686) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 14) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 735)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 735) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 63) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 784)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 16) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 833)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 833) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 65) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 882)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 882) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 18) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 931)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 931) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 67) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 980)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 980) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 20) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1029)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1029) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 69) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1078)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1078) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 22) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1127)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1127) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 71) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1176) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 24) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1225)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1225) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 73) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1274)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1274) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 26) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1323)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1323) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 75) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1372)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1372) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 28) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1421)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1421) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 77) % 96) * 3)) + rx_outer_outer)];
- kernel_shared[(((int)threadIdx.x) + 1470)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1470) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 30) * 3)) + rx_outer_outer)];
- if (((int)threadIdx.x) < 17) {
- kernel_shared[(((int)threadIdx.x) + 1519)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1519) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 79) * 3)) + rx_outer_outer)];
- }
- __syncthreads();
- for (int rc_outer_inner = 0; rc_outer_inner < 2; ++rc_outer_inner) {
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[(rc_outer_inner * 48)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 96)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 192)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 288)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 97)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 193)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 289)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 2)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 98)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 194)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 290)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 3)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 99)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 195)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 291)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 4)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 100)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 196)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 292)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 5)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 101)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 197)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 293)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 6)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 102)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 198)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 294)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 7)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 103)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 199)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 295)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 8)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 104)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 200)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 296)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 9)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 105)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 201)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 297)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 10)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 106)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 202)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 298)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 11)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 107)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 203)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 299)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 12)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 108)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 204)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 300)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 13)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 109)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 205)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 301)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 14)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 110)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 206)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 302)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 15)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 111)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 207)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 303)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 16)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 112)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 208)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 304)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 17)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 113)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 209)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 305)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 18)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 114)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 210)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 306)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 19)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 115)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 211)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 307)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 20)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 116)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 212)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 308)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 21)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 117)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 213)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 309)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 22)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 118)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 214)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 310)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 23)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 119)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 215)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 311)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 24)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 120)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 216)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 312)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 25)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 121)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 217)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 313)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 26)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 122)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 218)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 314)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 27)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 123)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 219)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 315)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 28)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 124)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 220)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 316)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 29)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 125)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 221)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 317)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 30)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 126)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 222)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 318)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 31)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 127)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 223)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 319)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 32)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 128)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 224)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 320)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 33)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 129)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 225)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 321)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 34)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 130)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 226)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 322)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 35)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 131)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 227)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 323)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 36)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 132)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 228)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 324)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 37)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 133)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 229)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 325)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 38)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 134)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 230)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 326)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 39)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 135)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 231)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 327)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 40)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 136)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 232)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 328)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 41)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 137)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 233)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 329)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 42)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 138)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 234)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 330)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 43)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 139)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 235)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 331)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 44)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 140)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 236)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 332)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 45)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 141)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 237)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 333)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 46)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 142)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 238)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 334)]));
- conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 47)]));
- conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 143)]));
- conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 239)]));
- conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 335)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 384)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 480)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 576)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 672)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 385)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 481)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 577)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 673)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 386)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 482)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 578)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 674)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 387)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 483)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 579)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 675)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 388)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 484)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 580)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 676)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 389)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 485)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 581)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 677)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 390)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 486)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 582)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 678)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 391)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 487)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 583)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 679)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 392)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 488)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 584)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 680)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 393)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 489)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 585)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 681)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 394)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 490)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 586)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 682)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 395)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 491)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 587)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 683)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 396)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 492)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 588)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 684)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 397)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 493)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 589)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 685)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 398)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 494)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 590)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 686)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 399)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 495)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 591)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 687)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 400)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 496)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 592)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 688)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 401)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 497)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 593)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 689)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 402)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 498)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 594)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 690)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 403)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 499)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 595)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 691)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 404)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 500)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 596)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 692)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 405)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 501)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 597)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 693)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 406)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 502)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 598)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 694)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 407)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 503)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 599)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 695)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 408)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 504)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 600)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 696)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 409)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 505)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 601)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 697)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 410)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 506)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 602)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 698)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 411)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 507)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 603)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 699)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 412)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 508)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 604)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 700)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 413)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 509)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 605)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 701)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 414)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 510)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 606)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 630)] * kernel_shared[((rc_outer_inner * 48) + 702)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 415)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 511)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 607)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 637)] * kernel_shared[((rc_outer_inner * 48) + 703)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 416)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 512)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 608)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 644)] * kernel_shared[((rc_outer_inner * 48) + 704)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 417)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 513)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 609)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 693)] * kernel_shared[((rc_outer_inner * 48) + 705)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 418)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 514)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 610)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 700)] * kernel_shared[((rc_outer_inner * 48) + 706)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 419)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 515)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 611)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 707)] * kernel_shared[((rc_outer_inner * 48) + 707)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 420)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 516)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 612)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 756)] * kernel_shared[((rc_outer_inner * 48) + 708)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 421)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 517)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 613)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 763)] * kernel_shared[((rc_outer_inner * 48) + 709)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 422)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 518)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 614)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 770)] * kernel_shared[((rc_outer_inner * 48) + 710)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 423)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 519)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 615)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 819)] * kernel_shared[((rc_outer_inner * 48) + 711)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 424)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 520)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 616)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 826)] * kernel_shared[((rc_outer_inner * 48) + 712)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 425)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 521)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 617)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 833)] * kernel_shared[((rc_outer_inner * 48) + 713)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 426)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 522)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 618)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 882)] * kernel_shared[((rc_outer_inner * 48) + 714)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 427)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 523)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 619)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 889)] * kernel_shared[((rc_outer_inner * 48) + 715)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 428)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 524)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 620)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 896)] * kernel_shared[((rc_outer_inner * 48) + 716)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 429)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 525)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 621)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 945)] * kernel_shared[((rc_outer_inner * 48) + 717)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 430)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 526)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 622)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 952)] * kernel_shared[((rc_outer_inner * 48) + 718)]));
- conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 431)]));
- conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 527)]));
- conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 623)]));
- conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 959)] * kernel_shared[((rc_outer_inner * 48) + 719)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 768)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 864)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 960)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[((rc_outer_inner * 1008) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 48) + 1056)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 769)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 865)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 961)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 48) + 1057)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 770)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 866)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 962)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 48) + 1058)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 771)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 867)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 963)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 48) + 1059)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 772)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 868)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 964)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 48) + 1060)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 773)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 869)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 965)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 48) + 1061)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 774)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 870)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 966)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 48) + 1062)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 775)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 871)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 967)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 48) + 1063)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 776)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 872)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 968)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 48) + 1064)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 777)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 873)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 969)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 48) + 1065)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 778)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 874)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 970)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 48) + 1066)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 779)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 875)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 971)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 48) + 1067)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 780)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 876)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 972)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 252)] * kernel_shared[((rc_outer_inner * 48) + 1068)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 781)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 877)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 973)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 259)] * kernel_shared[((rc_outer_inner * 48) + 1069)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 782)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 878)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 974)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 266)] * kernel_shared[((rc_outer_inner * 48) + 1070)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 783)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 879)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 975)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 315)] * kernel_shared[((rc_outer_inner * 48) + 1071)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 784)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 880)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 976)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 322)] * kernel_shared[((rc_outer_inner * 48) + 1072)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 785)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 881)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 977)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 329)] * kernel_shared[((rc_outer_inner * 48) + 1073)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 786)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 882)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 978)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 378)] * kernel_shared[((rc_outer_inner * 48) + 1074)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 787)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 883)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 979)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 385)] * kernel_shared[((rc_outer_inner * 48) + 1075)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 788)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 884)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 980)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 392)] * kernel_shared[((rc_outer_inner * 48) + 1076)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 789)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 885)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 981)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 441)] * kernel_shared[((rc_outer_inner * 48) + 1077)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 790)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 886)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 982)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 448)] * kernel_shared[((rc_outer_inner * 48) + 1078)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 791)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 887)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 983)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 455)] * kernel_shared[((rc_outer_inner * 48) + 1079)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 792)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 888)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 984)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 504)] * kernel_shared[((rc_outer_inner * 48) + 1080)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 793)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 889)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 985)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 511)] * kernel_shared[((rc_outer_inner * 48) + 1081)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 794)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 890)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 986)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 518)] * kernel_shared[((rc_outer_inner * 48) + 1082)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 795)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 891)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 987)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 567)] * kernel_shared[((rc_outer_inner * 48) + 1083)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 796)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 892)]));
- conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 988)]));
- conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 574)] * kernel_shared[((rc_outer_inner * 48) + 1084)]));
- conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 797)]));
- conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 1008) + ((int)threadIdx.x)) + 581)] * kernel_shared[((rc_outer_inner * 48) + 893)]));
... 3691 lines suppressed ...