You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ns...@apache.org on 2017/08/22 23:04:05 UTC

[incubator-mxnet] branch v0.11.0 updated: nightly build stochastically choose optimizer (#7559)

This is an automated email from the ASF dual-hosted git repository.

nswamy pushed a commit to branch v0.11.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v0.11.0 by this push:
     new 44132af  nightly build stochastically choose optimizer (#7559)
44132af is described below

commit 44132afc34134cd22def9d7be4e4b80c8724610d
Author: Chris Olivier <cj...@gmail.com>
AuthorDate: Tue Aug 22 16:04:03 2017 -0700

    nightly build stochastically choose optimizer (#7559)
    
    * Only call MKL script once
    
    * Fix 'momentum' and 'multi_precision' optimizer args
    
    * fix cmake build for active kvstore
    
    * stochastic choice of optimizer for mnist training
    
    * Run all three optimizers
    
    * Add just lenet test
---
 CMakeLists.txt                                     | 39 ++++++----
 tests/nightly/test_all.sh                          | 22 +++++-
 .../{test_all.sh => test_image_classification.sh}  | 86 +++++++++-------------
 3 files changed, 76 insertions(+), 71 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc9ca5f..5e32f6b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -354,7 +354,7 @@ if(USE_CUDA)
     FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
     list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
     FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-    list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver 
+    list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
   else(MSVC)
     list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver)
     link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
@@ -419,6 +419,29 @@ else()
     add_library(mxnet SHARED ${SOURCE})
   endif()
 endif()
+
+if(USE_DIST_KVSTORE)
+  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt)
+    add_subdirectory("ps-lite")
+    list(APPEND pslite_LINKER_LIBS pslite protobuf)
+    target_link_libraries(mxnet debug ${pslite_LINKER_LIBS_DEBUG})
+    target_link_libraries(mxnet optimized ${pslite_LINKER_LIBS_RELEASE})
+    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+      list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_DEBUG})
+    else()
+      list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_RELEASE})
+    endif()
+    target_link_libraries(mxnet debug ${pslite_LINKER_LIBS_DEBUG})
+    target_link_libraries(mxnet optimized ${pslite_LINKER_LIBS_RELEASE})
+
+  else()
+    set(pslite_LINKER_LIBS protobuf zmq-static)
+  endif()
+  add_definitions(-DMXNET_USE_DIST_KVSTORE)
+  include_directories(SYSTEM ${pslite_INCLUDE_DIR})
+  list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS})
+endif()
+
 target_link_libraries(mxnet ${mxnet_LINKER_LIBS})
 
 if(USE_PLUGINS_WARPCTC)
@@ -433,20 +456,6 @@ if(MSVC AND USE_MXNET_LIB_NAMING)
 endif()
 
 
-if(USE_DIST_KVSTORE)
-  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt)
-    add_subdirectory("ps-lite")
-    list(APPEND pslite_LINKER_LIBS pslite)
-    target_link_libraries(mxnet debug ${pslite_LINKER_LIBS_DEBUG})
-    target_link_libraries(mxnet optimized ${pslite_LINKER_LIBS_RELEASE})
-  else()
-    set(pslite_LINKER_LIBS protobuf zmq-static )
-  endif()
-  add_definitions(-DMXNET_USE_DIST_KVSTORE)
-  target_link_libraries(mxnet ${pslite_LINKER_LIBS})
-  include_directories(SYSTEM ${pslite_INCLUDE_DIR})
-endif()
-
 if(USE_PROFILER)
 	add_definitions(-DMXNET_USE_PROFILER)
 endif()
diff --git a/tests/nightly/test_all.sh b/tests/nightly/test_all.sh
index 32913c9..04d895f 100755
--- a/tests/nightly/test_all.sh
+++ b/tests/nightly/test_all.sh
@@ -72,10 +72,24 @@ check_val() {
 example_dir=../../example/image-classification
 # python: lenet + mnist
 test_lenet() {
-    python $example_dir/train_mnist.py \
-        --data-dir `pwd`/data/mnist/ --network lenet --gpus $gpus --num-epochs 10 \
-        2>&1 | tee log
-    check_val 0.99
+    optimizers="adam sgd adagrad"
+    for optimizer in ${optimizers}; do
+        echo "OPTIMIZER: $optimizer"
+        if [ "$optimizer" == "adam" ]; then
+            learning_rate=0.0005
+            desired_accuracy=0.98
+        else
+            learning_rate=0.01
+            desired_accuracy=0.99
+        fi
+        python $example_dir/train_mnist.py --lr $learning_rate \
+            --network lenet --optimizer $optimizer --gpus $gpus \
+            --num-epochs 10 2>&1 | tee log
+       if [ $? -ne 0 ]; then
+           return $?
+       fi
+       check_val $desired_accuracy
+    done
 }
 juLog -name=Python.Lenet.Mnist -error=Fail test_lenet
 
diff --git a/tests/nightly/test_all.sh b/tests/nightly/test_image_classification.sh
similarity index 51%
copy from tests/nightly/test_all.sh
copy to tests/nightly/test_image_classification.sh
index 32913c9..93e403a 100755
--- a/tests/nightly/test_all.sh
+++ b/tests/nightly/test_image_classification.sh
@@ -19,23 +19,31 @@
 
 
 # setup
-export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=`pwd`/`dirname $0`/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+export PYTHONPATH=`pwd`/`dirname $0`/python
 cd `pwd`/`dirname $0`
 . sh2ju.sh
+
 ## clean last build log
 juLogClean
-if [ $# -eq 1 ]; then
-    num_gpus=$1
+
+if [ -f $(which nvidia-smi) ]; then
+    if [ $# -eq 1 ]; then
+        num_gpus=$1
+    else
+        num_gpus=$(nvidia-smi -L | grep "GPU" | wc -l)
+    fi
+    gpus=`seq 0 $((num_gpus-1)) | paste -sd ","`
+    device_arg="--gpus $gpus"
 else
-    num_gpus=4
+    device_arg=""
 fi
-gpus=`seq 0 $((num_gpus-1)) | paste -sd ","`
 
 # build
 build() {
-make -C ../.. clean
-make -C ../.. -j8
-return $?
+    make -C ../.. clean
+    make -C ../.. -j8
+    return $?
 }
 
 cp ../../make/config.mk ../..
@@ -48,16 +56,6 @@ EOF
 
 juLog -name=Build -error=Error build
 
-# python: local kvstore
-juLog -name=Python.Local.KVStore -error=Error python test_kvstore.py
-
-# python: distributed kvstore
-juLog -name=Python.Distributed.KVStore -error=Error ../../tools/launch.py -n 4 python dist_sync_kvstore.py
-
-# download data
-juLog -name=DownloadData bash ./download.sh
-
-
 # check if the final evaluation accuracy exceed the threshold
 check_val() {
     expected=$1
@@ -68,44 +66,28 @@ check_val() {
     rm -f log
 }
 
-
 example_dir=../../example/image-classification
 # python: lenet + mnist
 test_lenet() {
-    python $example_dir/train_mnist.py \
-        --data-dir `pwd`/data/mnist/ --network lenet --gpus $gpus --num-epochs 10 \
-        2>&1 | tee log
-    check_val 0.99
+    optimizers="adam sgd adagrad"
+    for optimizer in ${optimizers}; do
+        echo "OPTIMIZER: $optimizer"
+        if [ "$optimizer" == "adam" ]; then
+            learning_rate=0.0005
+            desired_accuracy=0.98
+        else
+            learning_rate=0.01
+            desired_accuracy=0.99
+        fi
+        python $example_dir/train_mnist.py --lr $learning_rate \
+            --network lenet --optimizer $optimizer --gpus $gpus \
+            --num-epochs 10 2>&1 | tee log
+       if [ $? -ne 0 ]; then
+           return $?
+       fi
+       check_val $desired_accuracy
+    done
 }
 juLog -name=Python.Lenet.Mnist -error=Fail test_lenet
 
-# python: distributed lenet + mnist
-test_dist_lenet() {
-    ../../tools/launch.py -n ${num_gpus} \
-        python ./dist_lenet.py --data-dir `pwd`/data/mnist/ \
-        --kv-store dist_sync \
-        --num-epochs 10 \
-        2>&1 | tee log
-    check_val 0.98
-}
-juLog -name=Python.Distributed.Lenet.Mnist -error=Fail test_dist_lenet
-
-# python: inception + cifar10
-test_inception_cifar10() {
-    python $example_dir/train_cifar10.py \
-        --data-dir `pwd`/data/cifar10/ --gpus $gpus --num-epochs 20 --batch-size 256 \
-        2>&1 | tee log
-    check_val 0.82
-}
-juLog -name=Python.Inception.Cifar10 -error=Fail test_inception_cifar10
-
-# build without CUDNN
-cat >>../../config.mk <<EOF
-USE_CUDNN=0
-EOF
-juLog -name=BuildWithoutCUDNN -error=Error build
-
-# python: multi gpus lenet + mnist
-juLog -name=Python.Multi.Lenet.Mnist -error=Error python multi_lenet.py
-
 exit $errors

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].