You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/10 06:01:58 UTC
[3/3] incubator-singa git commit: Merge PR #232 for training AlexNet over ImageNet

Merge PR #232 for training AlexNet over ImageNet


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/53639b7c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/53639b7c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/53639b7c

Branch: refs/heads/dev
Commit: 53639b7ce8ddbde2e47473701ab414548575849b
Parents: 17bfb19 8051720
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Wed Aug 10 14:00:24 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Aug 10 14:00:24 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                   |   8 +-
 examples/CMakeLists.txt          |   1 +
 examples/imagenet/CMakeLists.txt |  16 ++
 examples/imagenet/README.md      |  58 +++++
 examples/imagenet/alexnet.cc     | 410 ++++++++++++++++++++++++++++++++++
 examples/imagenet/create_data.sh |   3 +
 examples/imagenet/ilsvrc12.cc    |  70 ++++++
 examples/imagenet/ilsvrc12.h     | 380 +++++++++++++++++++++++++++++++
 examples/imagenet/run.sh         |   3 +
 include/singa/io/snapshot.h      |   8 +-
 include/singa/utils/timer.h      |   6 +-
 src/core/tensor/tensor.cc        |   6 +-
 src/io/binfile_reader.cc         |   6 +-
 src/io/jpg_encoder.cc            |   2 +-
 src/io/snapshot.cc               |   8 +-
 15 files changed, 967 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53639b7c/CMakeLists.txt
----------------------------------------------------------------------
diff --cc CMakeLists.txt
index c1d0521,8c6afad..9efadc0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@@ -18,14 -18,13 +18,14 @@@ SET(SINGA_INCLUDE_DI
      "${CMAKE_SOURCE_DIR}/include;${CMAKE_SOURCE_DIR}/lib/cnmem/include;${PROJECT_BINARY_DIR}")
  INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
  
- OPTION(USE_CBLAS "Use CBlas libs" OFF)
- OPTION(USE_CUDA "Use Cuda libs" OFF)
- OPTION(USE_CUDNN "Use Cudnn libs" OFF)
+ OPTION(USE_CBLAS "Use CBlas libs" ON)
+ OPTION(USE_CUDA "Use Cuda libs" ON)
+ OPTION(USE_CUDNN "Use Cudnn libs" ON)
  OPTION(USE_OPENCV "Use opencv" OFF)
  OPTION(USE_LMDB "Use LMDB libs" OFF)
- OPTION(USE_PYTHON "Generate py wrappers" OFF)
+ OPTION(USE_PYTHON "Generate py wrappers" ON)
  OPTION(USE_OPENCL "Use OpenCL" OFF)
 +OPTION(ENABLE_DIST "enable distributed training" OFF)
  #OPTION(BUILD_OPENCL_TESTS "Build OpenCL tests" OFF)
  
  INCLUDE("cmake/Dependencies.cmake")

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53639b7c/examples/imagenet/README.md
----------------------------------------------------------------------
diff --cc examples/imagenet/README.md
index 0000000,2e0389a..be6797c
mode 000000,100644..100644
--- a/examples/imagenet/README.md
+++ b/examples/imagenet/README.md
@@@ -1,0 -1,43 +1,58 @@@
 -# Example of alexnet
++# Train AlexNet over ImageNet
++
++Convolution neural network (CNN) is a type of feed-forward neural
++network widely used for image and video classification. In this example, we will
++use a [deep CNN model](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks)
++to do image classification against the ImageNet dataset.
++
++## Instructions
++
++### Compile SINGA
++
++Please compile SINGA with CUDA, CUDNN and OpenCV. You can manually turn on the
++options in CMakeLists.txt or run `ccmake ..` in build/ folder.
++
++We have tested CUDNN V4 and V5 (V5 requires CUDA 7.5)
+ 
+ ### Data download
+ * Please refer to step1-3 on [Instructions to create ImageNet 2012 data](https://github.com/amd/OpenCL-caffe/wiki/Instructions-to-create-ImageNet-2012-data)
+   to download and decompress the data.
 -* You can download the training and validation list by 
 -  [get_ilsvrc_aux.sh](https://github.com/BVLC/caffe/blob/master/data/ilsvrc12/get_ilsvrc_aux.sh) 
++* You can download the training and validation list by
++  [get_ilsvrc_aux.sh](https://github.com/BVLC/caffe/blob/master/data/ilsvrc12/get_ilsvrc_aux.sh)
+   or from [Imagenet](http://www.image-net.org/download-images).
+ 
+ ### Data preprocessing
 -* Assuming you have downloaded the data and the list. 
++* Assuming you have downloaded the data and the list.
+   Now we should transform the data into binary files. You can run:
 -  
++
+           sh create_data.sh
 - 
 -  The script will generate a test file(`test.bin`), a mean file(`mean.bin`) and 
++
++  The script will generate a test file(`test.bin`), a mean file(`mean.bin`) and
+   several training files(`trainX.bin`) in the specified output folder.
+ * You can also change the parameters in `create_data.sh`.
+   + `-trainlist <file>`: the file of training list;
+   + `-trainfolder <folder>`: the folder of training images;
+   + `-testlist <file>`: the file of test list;
+   + `-testfolder <floder>`: the folder of test images;
 -  + `-outdata <folder>`: the folder to save output files, including mean, training and test files. 
++  + `-outdata <folder>`: the folder to save output files, including mean, training and test files.
+     The script will generate these files in the specified folder;
+   + `-filesize <int>`: number of training images that stores in each binary file.
+ 
+ ### Training
+ * After preparing data, you can run the following command to train the Alexnet model.
+ 
+           sh run.sh
++
+ * You may change the parameters in `run.sh`.
+   + `-epoch <int>`: number of epoch to be trained, default is 90;
 -  + `-lr <float>`: base learning rate, the learning rate will decrease each 20 epochs, 
++  + `-lr <float>`: base learning rate, the learning rate will decrease each 20 epochs,
+     more specifically, `lr = lr * exp(0.1 * (epoch / 20))`;
+   + `-batchsize <int>`: batchsize, it should be changed regarding to your memory;
+   + `-filesize <int>`: number of training images that stores in each binary file, it is the
+     same as the `filesize` in data preprocessing;
+   + `-ntrain <int>`: number of training images;
+   + `-ntest <int>`: number of test images;
+   + `-data <folder>`: the folder which stores the binary files, it is exactly the output
+     folder in data preprocessing step;
+   + `-pfreq <int>`: the frequency(in batch) of printing current model status(loss and accuracy);
 -  + `-nthreads <int>`: the number of threads to load data which feed to the model.
++  + `-nthreads <int>`: the number of threads to load data which feed to the model.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53639b7c/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --cc src/core/tensor/tensor.cc
index d2fec53,2951aa9..e260f9e
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@@ -34,17 -34,13 +34,15 @@@ Tensor::Tensor() { device_ = defaultDev
  
  Tensor::Tensor(const Shape &shape, DataType dtype)
      : data_type_(dtype), device_(defaultDevice), shape_(shape) {
-   device_ = defaultDevice;
 -  //device_ = defaultDevice;
 -  block_ = device_->NewBlock(Product(shape_) * SizeOf(data_type_));
 +  size_t size = Product(shape_) * SizeOf(data_type_);
 +  if (size)
 +    block_ = device_->NewBlock(size);
  }
  Tensor::Tensor(Shape &&shape, DataType dtype)
      : data_type_(dtype), device_(defaultDevice), shape_(shape) {
-   device_ = defaultDevice;
 -  //device_ = defaultDevice;
 -  block_ = device_->NewBlock(Product(shape_) * SizeOf(data_type_));
 +  size_t size = Product(shape_) * SizeOf(data_type_);
 +  if (size)
 +    block_ = device_->NewBlock(size);
  }
  Tensor::Tensor(const Shape &shape, std::shared_ptr<Device> device,
                 DataType dtype)