You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ns...@apache.org on 2017/08/22 23:04:05 UTC
[incubator-mxnet] branch v0.11.0 updated: nightly build
stochastically choose optimizer (#7559)
This is an automated email from the ASF dual-hosted git repository.
nswamy pushed a commit to branch v0.11.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/v0.11.0 by this push:
new 44132af nightly build stochastically choose optimizer (#7559)
44132af is described below
commit 44132afc34134cd22def9d7be4e4b80c8724610d
Author: Chris Olivier <cj...@gmail.com>
AuthorDate: Tue Aug 22 16:04:03 2017 -0700
nightly build stochastically choose optimizer (#7559)
* Only call MKL script once
* Fix 'momentum' and 'multi_precision' optimizer args
* fix cmake build for active kvstore
* stochastic choice of optimizer for mnist training
* Run all three optimizers
* Add just lenet test
---
CMakeLists.txt | 39 ++++++----
tests/nightly/test_all.sh | 22 +++++-
.../{test_all.sh => test_image_classification.sh} | 86 +++++++++-------------
3 files changed, 76 insertions(+), 71 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc9ca5f..5e32f6b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -354,7 +354,7 @@ if(USE_CUDA)
FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
- list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
+ list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
else(MSVC)
list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver)
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
@@ -419,6 +419,29 @@ else()
add_library(mxnet SHARED ${SOURCE})
endif()
endif()
+
+if(USE_DIST_KVSTORE)
+ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt)
+ add_subdirectory("ps-lite")
+ list(APPEND pslite_LINKER_LIBS pslite protobuf)
+ target_link_libraries(mxnet debug ${pslite_LINKER_LIBS_DEBUG})
+ target_link_libraries(mxnet optimized ${pslite_LINKER_LIBS_RELEASE})
+ if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+ list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_DEBUG})
+ else()
+ list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_RELEASE})
+ endif()
+ target_link_libraries(mxnet debug ${pslite_LINKER_LIBS_DEBUG})
+ target_link_libraries(mxnet optimized ${pslite_LINKER_LIBS_RELEASE})
+
+ else()
+ set(pslite_LINKER_LIBS protobuf zmq-static)
+ endif()
+ add_definitions(-DMXNET_USE_DIST_KVSTORE)
+ include_directories(SYSTEM ${pslite_INCLUDE_DIR})
+ list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS})
+endif()
+
target_link_libraries(mxnet ${mxnet_LINKER_LIBS})
if(USE_PLUGINS_WARPCTC)
@@ -433,20 +456,6 @@ if(MSVC AND USE_MXNET_LIB_NAMING)
endif()
-if(USE_DIST_KVSTORE)
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt)
- add_subdirectory("ps-lite")
- list(APPEND pslite_LINKER_LIBS pslite)
- target_link_libraries(mxnet debug ${pslite_LINKER_LIBS_DEBUG})
- target_link_libraries(mxnet optimized ${pslite_LINKER_LIBS_RELEASE})
- else()
- set(pslite_LINKER_LIBS protobuf zmq-static )
- endif()
- add_definitions(-DMXNET_USE_DIST_KVSTORE)
- target_link_libraries(mxnet ${pslite_LINKER_LIBS})
- include_directories(SYSTEM ${pslite_INCLUDE_DIR})
-endif()
-
if(USE_PROFILER)
add_definitions(-DMXNET_USE_PROFILER)
endif()
diff --git a/tests/nightly/test_all.sh b/tests/nightly/test_all.sh
index 32913c9..04d895f 100755
--- a/tests/nightly/test_all.sh
+++ b/tests/nightly/test_all.sh
@@ -72,10 +72,24 @@ check_val() {
example_dir=../../example/image-classification
# python: lenet + mnist
test_lenet() {
- python $example_dir/train_mnist.py \
- --data-dir `pwd`/data/mnist/ --network lenet --gpus $gpus --num-epochs 10 \
- 2>&1 | tee log
- check_val 0.99
+ optimizers="adam sgd adagrad"
+ for optimizer in ${optimizers}; do
+ echo "OPTIMIZER: $optimizer"
+ if [ "$optimizer" == "adam" ]; then
+ learning_rate=0.0005
+ desired_accuracy=0.98
+ else
+ learning_rate=0.01
+ desired_accuracy=0.99
+ fi
+ python $example_dir/train_mnist.py --lr $learning_rate \
+ --network lenet --optimizer $optimizer --gpus $gpus \
+ --num-epochs 10 2>&1 | tee log
+ if [ $? -ne 0 ]; then
+ return $?
+ fi
+ check_val $desired_accuracy
+ done
}
juLog -name=Python.Lenet.Mnist -error=Fail test_lenet
diff --git a/tests/nightly/test_all.sh b/tests/nightly/test_image_classification.sh
similarity index 51%
copy from tests/nightly/test_all.sh
copy to tests/nightly/test_image_classification.sh
index 32913c9..93e403a 100755
--- a/tests/nightly/test_all.sh
+++ b/tests/nightly/test_image_classification.sh
@@ -19,23 +19,31 @@
# setup
-export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=`pwd`/`dirname $0`/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+export PYTHONPATH=`pwd`/`dirname $0`/python
cd `pwd`/`dirname $0`
. sh2ju.sh
+
## clean last build log
juLogClean
-if [ $# -eq 1 ]; then
- num_gpus=$1
+
+if [ -f $(which nvidia-smi) ]; then
+ if [ $# -eq 1 ]; then
+ num_gpus=$1
+ else
+ num_gpus=$(nvidia-smi -L | grep "GPU" | wc -l)
+ fi
+ gpus=`seq 0 $((num_gpus-1)) | paste -sd ","`
+ device_arg="--gpus $gpus"
else
- num_gpus=4
+ device_arg=""
fi
-gpus=`seq 0 $((num_gpus-1)) | paste -sd ","`
# build
build() {
-make -C ../.. clean
-make -C ../.. -j8
-return $?
+ make -C ../.. clean
+ make -C ../.. -j8
+ return $?
}
cp ../../make/config.mk ../..
@@ -48,16 +56,6 @@ EOF
juLog -name=Build -error=Error build
-# python: local kvstore
-juLog -name=Python.Local.KVStore -error=Error python test_kvstore.py
-
-# python: distributed kvstore
-juLog -name=Python.Distributed.KVStore -error=Error ../../tools/launch.py -n 4 python dist_sync_kvstore.py
-
-# download data
-juLog -name=DownloadData bash ./download.sh
-
-
# check if the final evaluation accuracy exceed the threshold
check_val() {
expected=$1
@@ -68,44 +66,28 @@ check_val() {
rm -f log
}
-
example_dir=../../example/image-classification
# python: lenet + mnist
test_lenet() {
- python $example_dir/train_mnist.py \
- --data-dir `pwd`/data/mnist/ --network lenet --gpus $gpus --num-epochs 10 \
- 2>&1 | tee log
- check_val 0.99
+ optimizers="adam sgd adagrad"
+ for optimizer in ${optimizers}; do
+ echo "OPTIMIZER: $optimizer"
+ if [ "$optimizer" == "adam" ]; then
+ learning_rate=0.0005
+ desired_accuracy=0.98
+ else
+ learning_rate=0.01
+ desired_accuracy=0.99
+ fi
+ python $example_dir/train_mnist.py --lr $learning_rate \
+ --network lenet --optimizer $optimizer --gpus $gpus \
+ --num-epochs 10 2>&1 | tee log
+ if [ $? -ne 0 ]; then
+ return $?
+ fi
+ check_val $desired_accuracy
+ done
}
juLog -name=Python.Lenet.Mnist -error=Fail test_lenet
-# python: distributed lenet + mnist
-test_dist_lenet() {
- ../../tools/launch.py -n ${num_gpus} \
- python ./dist_lenet.py --data-dir `pwd`/data/mnist/ \
- --kv-store dist_sync \
- --num-epochs 10 \
- 2>&1 | tee log
- check_val 0.98
-}
-juLog -name=Python.Distributed.Lenet.Mnist -error=Fail test_dist_lenet
-
-# python: inception + cifar10
-test_inception_cifar10() {
- python $example_dir/train_cifar10.py \
- --data-dir `pwd`/data/cifar10/ --gpus $gpus --num-epochs 20 --batch-size 256 \
- 2>&1 | tee log
- check_val 0.82
-}
-juLog -name=Python.Inception.Cifar10 -error=Fail test_inception_cifar10
-
-# build without CUDNN
-cat >>../../config.mk <<EOF
-USE_CUDNN=0
-EOF
-juLog -name=BuildWithoutCUDNN -error=Error build
-
-# python: multi gpus lenet + mnist
-juLog -name=Python.Multi.Lenet.Mnist -error=Error python multi_lenet.py
-
exit $errors
--
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].