You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2023/02/15 09:13:45 UTC
[singa] branch dev updated: add cifar 10 multi process for large dataset cnn
This is an automated email from the ASF dual-hosted git repository.
zhaojing pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/singa.git
The following commit(s) were added to refs/heads/dev by this push:
new 62551d32 add cifar 10 multi process for large dataset cnn
new ff8f92a5 Merge pull request #1030 from wannature/singa_v31
62551d32 is described below
commit 62551d32701c9cad932a96be3c686f7db9c9f4dc
Author: wenqiao zhang <we...@zju.edu.cn>
AuthorDate: Tue Feb 14 20:33:55 2023 +0800
add cifar 10 multi process for large dataset cnn
---
.../autograd/cifar10_multiprocess.py | 43 ++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/examples/largedataset_cnn/autograd/cifar10_multiprocess.py b/examples/largedataset_cnn/autograd/cifar10_multiprocess.py
new file mode 100644
index 00000000..df2dba8b
--- /dev/null
+++ b/examples/largedataset_cnn/autograd/cifar10_multiprocess.py
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from resnet_cifar10 import *
+import multiprocessing
+import sys
+
+if __name__ == '__main__':
+
+ # Generate a NCCL ID to be used for collective communication
+ nccl_id = singa.NcclIdHolder()
+
+ # Configure number of GPUs to be used
+ world_size = int(sys.argv[1])
+
+ # Testing the experimental partial-parameter update asynchronous training
+ partial_update = True
+
+ process = []
+ for local_rank in range(0, world_size):
+ process.append(
+ multiprocessing.Process(target=train_cifar10,
+ args=(True, local_rank, world_size, nccl_id,
+ partial_update)))
+
+ for p in process:
+ p.start()