You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/01/05 17:59:16 UTC
[3/3] incubator-singa git commit: SINGA-98 Add Support for AlexNet ImageNet Classification Model

SINGA-98 Add Support for AlexNet ImageNet Classification Model

Update the CudnnActivationLayer to share the data and grad blob with conv layer for memory space reduction.
It is controlled by the share_src_blobs field in the job config file.
The loss reduces after 3000 iterations using 256 mini-batch like Caffe.

cpplint check; updte job conf for cpu training;


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bb75a0be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bb75a0be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bb75a0be

Branch: refs/heads/master
Commit: bb75a0be5f1bf00d24552fb943b5fc40453b5855
Parents: 6e815db
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Dec 29 19:10:00 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 00:58:18 2016 +0800

----------------------------------------------------------------------
 examples/alexnet/cudnn.conf       | 18 ++++++++++--
 examples/alexnet/im2rec.cc        | 35 +++++++++++++++++++-----
 examples/alexnet/job.conf         | 50 ++++++++++++++++++----------------
 examples/alexnet/rec2im_test.cc   | 39 ++++++++++++++++++++------
 src/neuralnet/layer.cc            |  2 +-
 src/neuralnet/neuron_layer/lrn.cc |  5 ++--
 src/proto/job.proto               |  3 ++
 src/utils/updater.cc              |  2 +-
 8 files changed, 108 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/cudnn.conf
----------------------------------------------------------------------
diff --git a/examples/alexnet/cudnn.conf b/examples/alexnet/cudnn.conf
index eef20f9..6324185 100644
--- a/examples/alexnet/cudnn.conf
+++ b/examples/alexnet/cudnn.conf
@@ -1,7 +1,7 @@
 name: "alexnet"
 train_steps: 450000
-#test_steps: 500
-#test_freq: 1000
+test_steps: 500
+test_freq: 1000
 disp_freq: 20
 checkpoint_freq: 100000
 checkpoint_after: 100000
@@ -423,6 +423,20 @@ neuralnet {
     }
     srclayers: "ip8"
     srclayers: "data"
+    include: kTrain
+  }
+  layer {
+   name : "softmax"
+   type: kCudnnSoftmax
+   srclayers: "ip8"
+   include: kTest
+  }
+  layer {
+   name : "accuracy"
+   type: kAccuracy
+   srclayers: "softmax"
+   srclayers: "data"
+   include: kTest
   }
 }
 cluster {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/im2rec.cc
----------------------------------------------------------------------
diff --git a/examples/alexnet/im2rec.cc b/examples/alexnet/im2rec.cc
index cf6eedf..58ee44f 100644
--- a/examples/alexnet/im2rec.cc
+++ b/examples/alexnet/im2rec.cc
@@ -1,4 +1,27 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+
 #include <glog/logging.h>
+#include <opencv2/opencv.hpp>
 #include <algorithm>
 #include <random>
 #include <chrono>
@@ -7,7 +30,6 @@
 #include <cstdint>
 #include <iostream>
 #include <vector>
-#include <opencv2/opencv.hpp>
 
 #include "singa/io/store.h"
 #include "singa/proto/common.pb.h"
@@ -20,8 +42,7 @@ const int kImageNBytes = 256*256*3;
 void create_data(const string& image_list,
     const string& input_folder,
     const string& output_folder,
-    const string& backend = "kvfile")
-{
+    const string& backend = "kvfile") {
   singa::RecordProto image;
   image.add_shape(3);
   image.add_shape(kImageSize);
@@ -49,12 +70,12 @@ void create_data(const string& image_list,
   string rec_buf;
   cv::Mat img, res;
   std::vector<std::pair<string, int>> file_list;
-  while(image_list_file >> image_file_name >> label)
+  while (image_list_file >> image_file_name >> label)
     file_list.push_back(std::make_pair(image_file_name, label));
   LOG(INFO) << "Data Shuffling";
   unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
   std::shuffle(file_list.begin(), file_list.end()
-      ,std::default_random_engine());
+      , std::default_random_engine());
   LOG(INFO) << "Total number of images is " << file_list.size();
   int ImageNum = file_list.size();
 
@@ -120,8 +141,8 @@ void create_data(const string& image_list,
 
 int main(int argc, char** argv) {
   if (argc < 4) {
-    std::cout << "Create Datashard for ImageNet dataset.\n"
-      << "Usage: <image_list> <input_folder> <output_folder>"
+    std::cout << "Create data stores for ImageNet dataset.\n"
+      << "Usage: <image_list_file> <input_image_folder> <output_folder>"
       << " <Optional: backend {lmdb, kvfile} default: kvfile>\n";
   } else {
     google::InitGoogleLogging(argv[0]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/job.conf
----------------------------------------------------------------------
diff --git a/examples/alexnet/job.conf b/examples/alexnet/job.conf
index 1898a09..3b7eaf4 100644
--- a/examples/alexnet/job.conf
+++ b/examples/alexnet/job.conf
@@ -1,10 +1,10 @@
 name: "alexnet"
-train_steps: 100
-test_steps: 0
-test_freq: 300
-disp_freq: 5
-#debug: true
-#checkpoint_path: "examples/alexnet/checkpoint/step10000-worker0"
+train_steps: 450000
+test_steps: 500
+test_freq: 1000
+disp_freq: 20
+checkpoint_freq: 100000
+checkpoint_after: 100000
 train_one_batch {
   alg: kBP
 }
@@ -13,8 +13,12 @@ updater{
   weight_decay: 0.0005
   momentum: 0.9
   learning_rate {
-    type: kFixed
+    type: kStep
     base_lr: 0.01
+    step_conf {
+      gamma: 0.1
+      change_freq: 100000
+    }
   }
 }
 neuralnet {
@@ -25,22 +29,22 @@ neuralnet {
       backend: "kvfile"
       path :"/data/dataset/imagenet/train_record.bin"
       mean_file: "/data/dataset/imagenet/image_mean.bin"
-      batchsize: 32
-      #random_skip: 5000
+      batchsize: 256
+      #random_skip: 1000
       shape: 3
       shape: 256
       shape: 256
     }
-      include: kTrain
+    include: kTrain
   }
   layer{
     name: "data"
     type: kRecordInput
     store_conf {
       backend: "kvfile"
-      path :"/data/dataset/val_record.bin"
-      mean_file: "/data/dataset/image_mean.bin"
-      batchsize: 256
+      path :"/data/dataset/imagenet/val_record.bin"
+      mean_file: "/data/dataset/imagenet/image_mean.bin"
+      batchsize: 100
       shape: 3
       shape: 256
       shape: 256
@@ -59,7 +63,7 @@ neuralnet {
   }
   layer{
     name: "conv1"
-    type: kCConvolution
+    type: kConvolution
     srclayers: "image"
     convolution_conf {
       num_filters: 96
@@ -92,7 +96,7 @@ neuralnet {
   }
   layer {
     name: "pool1"
-    type: kCPooling
+    type: kPooling
     pooling_conf {
       pool: MAX
       kernel: 3
@@ -108,7 +112,7 @@ neuralnet {
       local_size: 5
       alpha: 0.0001
       beta: 0.75
-      knorm: 2
+      knorm: 1
     }
     srclayers: "pool1"
 #    partition_dim: 0
@@ -116,7 +120,7 @@ neuralnet {
 
   layer{
     name: "conv2"
-    type: kCConvolution
+    type: kConvolution
     srclayers: "norm1"
     convolution_conf {
       num_filters: 256
@@ -149,7 +153,7 @@ neuralnet {
   }
   layer {
     name: "pool2"
-    type: kCPooling
+    type: kPooling
     pooling_conf {
       pool: MAX
       kernel: 3
@@ -166,14 +170,14 @@ neuralnet {
       local_size: 5
       alpha: 0.0001
       beta: 0.75
-      knorm: 2
+      knorm: 1
     }
     srclayers: "pool2"
 #    partition_dim: 0
   }
   layer{
     name: "conv3"
-    type: kCConvolution
+    type: kConvolution
     srclayers: "norm2"
     convolution_conf {
       num_filters: 384
@@ -206,7 +210,7 @@ neuralnet {
   }
   layer{
     name: "conv4"
-    type: kCConvolution
+    type: kConvolution
     srclayers: "relu3"
     convolution_conf {
       num_filters: 384
@@ -239,7 +243,7 @@ neuralnet {
   }
   layer{
     name: "conv5"
-    type: kCConvolution
+    type: kConvolution
     srclayers: "relu4"
     convolution_conf {
       num_filters: 256
@@ -272,7 +276,7 @@ neuralnet {
   }
   layer {
     name: "pool5"
-    type: kCPooling
+    type: kPooling
     pooling_conf {
       pool: MAX
       kernel: 3

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/rec2im_test.cc
----------------------------------------------------------------------
diff --git a/examples/alexnet/rec2im_test.cc b/examples/alexnet/rec2im_test.cc
index 0fa3505..bb92d95 100644
--- a/examples/alexnet/rec2im_test.cc
+++ b/examples/alexnet/rec2im_test.cc
@@ -1,4 +1,28 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+
 #include <glog/logging.h>
+#include <opencv2/opencv.hpp>
+
 #include <algorithm>
 #include <random>
 #include <chrono>
@@ -7,7 +31,6 @@
 #include <cstdint>
 #include <iostream>
 #include <vector>
-#include <opencv2/opencv.hpp>
 
 #include "singa/io/store.h"
 #include "singa/proto/common.pb.h"
@@ -19,8 +42,7 @@ const int kImageNBytes = 256*256*3;
 
 void generate_image(const string& output_folder,
     const string& key,
-    const string& val)
-{
+    const string& val) {
   float image_buf[kImageNBytes];
   singa::RecordProto image;
   image.ParseFromString(val);
@@ -38,6 +60,7 @@ void generate_image(const string& output_folder,
           static_cast<uchar>(
               static_cast<uint8_t>(
                 pixel[(c * kImageSize + h) * kImageSize + w]));
+    }
   }
 
   cv::imwrite(image_name, img);
@@ -45,8 +68,7 @@ void generate_image(const string& output_folder,
 
 void visualize(const string& input_file,
     const string& output_folder,
-    const string& id_list)
-{
+    const string& id_list) {
   auto store = singa::io::OpenStore("kvfile", input_file,
       singa::io::kRead);
 
@@ -55,7 +77,7 @@ void visualize(const string& input_file,
   std::ifstream id_list_file(id_list.c_str(), std::ios::in);
   CHECK(id_list_file.is_open()) << "Unable to open image id list";
   string id_;
-  while(id_list_file >> id_) {
+  while (id_list_file >> id_) {
     int x;
     x = std::stoi(id_);
     image_id_list.push_back(x);
@@ -80,10 +102,9 @@ void visualize(const string& input_file,
   }
 }
 
-int main(int argc, char** argv)
-{
+int main(int argc, char** argv) {
   if (argc != 4) {
-    std::cout << "Visualize images from binary kvfile record.\n"
+    std::cout << "Visualize images from binary kvfile records.\n"
       << "Usage: <input_file> <output_folder> <id_list>\n";
   } else {
     google::InitGoogleLogging(argv[0]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index ac673dd..cb1f3b8 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -56,7 +56,7 @@ const std::string Layer::ToString(bool debug, int flag) {
   if ((flag & kBackward) == kBackward && grad_.count() != 0) {
     ret += StringPrintf("grad:%e ", Asum(grad_));
     for (Param* p : GetParams())
-      ret += StringPrintf("%13.9f ",
+      ret += StringPrintf("%s:%13.9f ",
           p->name().c_str(), Asum(p->grad()));
   }
   return ret;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/neuralnet/neuron_layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/lrn.cc b/src/neuralnet/neuron_layer/lrn.cc
index ce96d11..b199b9a 100644
--- a/src/neuralnet/neuron_layer/lrn.cc
+++ b/src/neuralnet/neuron_layer/lrn.cc
@@ -64,12 +64,11 @@ void LRNLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
   auto grad = Tensor4(&grad_);
   auto gsrc = Tensor4(srclayers[0]->mutable_grad(this));
 
-  gsrc = grad * expr::F<op::power>(norm, -beta_ );
+  gsrc = grad * expr::F<op::power>(norm, -beta_);
   Tensor<cpu, 4> tmp(gsrc.shape);
   AllocSpace(tmp);
   tmp = gsrc * src / norm;
-  gsrc += ( - 2.0f * beta_ * salpha ) * expr::chpool<red::sum>(tmp, lsize_ )
-    * src;
+  gsrc += (- 2.0f * beta_ * salpha) * expr::chpool<red::sum>(tmp, lsize_) * src;
   FreeSpace(tmp);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 03ee327..db55987 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -192,6 +192,9 @@ message LayerProto {
   optional LayerType type = 20 [default = kUserLayer];
   // type of user layer
   optional string user_type = 21;
+  // share data and grad blob with the single src layer, e.g., relu layer can
+  // share blobs from conv layer. It is useful for saving memory space.
+  optional bool share_src_blobs = 22 [default = false];
   // overrides the partition dimension for neural net
   optional int32 partition_dim = 60 [default = -1];
   // names of parameters shared from other layers

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index bb055c2..21608fa 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -60,7 +60,7 @@ float StepLRGen::Get(int step) {
   // do not cast int to float
   int freq = proto_.step_conf().change_freq();
   float lr = proto_.base_lr() * pow(proto_.step_conf().gamma(), step / freq);
-  LOG_IF(ERROR, step % freq == 0) << "Update learning rate to " << lr
+  LOG_IF(INFO, step % freq == 0) << "Update learning rate to " << lr
     << " @ step " << step;
   return lr;
 }