You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/09/14 12:01:53 UTC

[1/3] incubator-singa git commit: SINGA-51 Improve the convolution and pooling operations

Repository: incubator-singa
Updated Branches:
  refs/heads/master 077d3804f -> d5d817e14


SINGA-51 Improve the convolution and pooling operations

Caffe's im2col is adopted to speed up the convolution operation.
The max pooling operation is accelerated by book-keeping the max neuron
position like Caffe.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d59eecf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d59eecf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d59eecf

Branch: refs/heads/master
Commit: 6d59eecf0502a0f3575770cb166be94775cf83f2
Parents: 50deedd
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sun Sep 13 20:00:02 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Sun Sep 13 20:00:02 2015 +0800

----------------------------------------------------------------------
 examples/cifar10/job.conf        |  20 +-
 examples/mnist/conv.conf         |   8 +-
 include/neuralnet/neuron_layer.h |  21 +++
 include/utils/common.h           |  42 ++++-
 src/driver.cc                    |   2 +
 src/neuralnet/neuron_layer.cc    |  82 +++++++-
 src/proto/job.proto              |   5 +-
 src/trainer/trainer.cc           |   1 -
 src/utils/blob.cc                |   2 +-
 src/utils/common.cc              | 344 +++++++++++++++++++++++++++++-----
 10 files changed, 451 insertions(+), 76 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/examples/cifar10/job.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/job.conf b/examples/cifar10/job.conf
index 0fdd244..b36c45a 100644
--- a/examples/cifar10/job.conf
+++ b/examples/cifar10/job.conf
@@ -27,7 +27,7 @@ neuralnet {
     type: kShardData
     sharddata_conf {
       path: "examples/cifar10/cifar10_train_shard"
-      batchsize: 16
+      batchsize: 64
       random_skip: 5000
     }
     exclude: kTest
@@ -57,7 +57,7 @@ neuralnet {
 
   layer {
     name: "conv1"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "rgb"
     convolution_conf {
       num_filters: 32
@@ -84,7 +84,7 @@ neuralnet {
 
   layer {
     name: "pool1"
-    type: kPooling
+    type: kCPooling
     srclayers: "conv1"
     pooling_conf {
       pool: MAX
@@ -109,7 +109,7 @@ neuralnet {
   }
   layer {
     name: "conv2"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "norm1"
     convolution_conf {
       num_filters: 32
@@ -140,10 +140,10 @@ neuralnet {
   }
   layer {
     name: "pool2"
-    type: kPooling
+    type: kCPooling
     srclayers: "relu2"
     pooling_conf {
-      pool: AVE
+      pool: AVG
       kernel: 3
       stride: 2
     }
@@ -160,7 +160,7 @@ neuralnet {
   }
   layer {
     name: "conv3"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "norm2"
     convolution_conf {
       num_filters: 64
@@ -190,10 +190,10 @@ neuralnet {
   }
   layer {
     name: "pool3"
-    type: kPooling
+    type: kCPooling
     srclayers: "relu3"
     pooling_conf {
-      pool: AVE
+      pool: AVG
       kernel: 3
       stride: 2
     }
@@ -237,5 +237,7 @@ neuralnet {
 cluster {
   nworker_groups: 1
   nserver_groups: 1
+  nworkers_per_group: 1
+  nworkers_per_procs: 1
   workspace: "examples/cifar10"
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/examples/mnist/conv.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/conv.conf b/examples/mnist/conv.conf
index aaf34f2..7f7a158 100644
--- a/examples/mnist/conv.conf
+++ b/examples/mnist/conv.conf
@@ -57,7 +57,7 @@ neuralnet {
   }
   layer {
     name: "conv1"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "mnist"
     convolution_conf {
       num_filters: 20
@@ -81,7 +81,7 @@ neuralnet {
   }
   layer {
     name: "pool1"
-    type: kPooling
+    type: kCPooling
     srclayers: "conv1"
     pooling_conf {
       pool: MAX
@@ -91,7 +91,7 @@ neuralnet {
   }
   layer {
     name: "conv2"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "pool1"
     convolution_conf {
       num_filters: 50
@@ -115,7 +115,7 @@ neuralnet {
   }
   layer {
     name: "pool2"
-    type: kPooling
+    type: kCPooling
     srclayers: "conv2"
     pooling_conf {
       pool: MAX

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/include/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuron_layer.h b/include/neuralnet/neuron_layer.h
index e5663d8..dd45eec 100644
--- a/include/neuralnet/neuron_layer.h
+++ b/include/neuralnet/neuron_layer.h
@@ -36,6 +36,15 @@ class ConvolutionLayer : public NeuronLayer {
   Blob<float> col_data_, col_grad_;
 };
 
+/**
+ * Use im2col from Caffe
+ */
+class CConvolutionLayer : public ConvolutionLayer {
+ public:
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+};
+
 class DropoutLayer : public NeuronLayer {
  public:
   void Setup(const LayerProto& proto, int npartitions) override;
@@ -85,6 +94,18 @@ class PoolingLayer : public NeuronLayer {
   PoolingProto_PoolMethod pool_;
 };
 
+/**
+ * Use book-keeping for BP following Caffe's pooling implementation
+ */
+class CPoolingLayer : public PoolingLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions);
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+ private:
+  Blob<float> mask_;
+};
+
 class ReLULayer : public NeuronLayer {
  public:
   void Setup(const LayerProto& proto, int npartitions) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/include/utils/common.h
----------------------------------------------------------------------
diff --git a/include/utils/common.h b/include/utils/common.h
index 868047a..599424d 100644
--- a/include/utils/common.h
+++ b/include/utils/common.h
@@ -14,14 +14,7 @@ namespace singa {
 std::string IntVecToString(const std::vector<int>& vec);
 std::string VStringPrintf(std::string fmt, va_list l);
 std::string StringPrintf(std::string fmt, ...);
-void ReadProtoFromTextFile(const char* filename,
-                           google::protobuf::Message* proto);
-void WriteProtoToTextFile(const google::protobuf::Message& proto,
-                          const char* filename);
-void ReadProtoFromBinaryFile(const char* filename,
-                             google::protobuf::Message* proto);
-void WriteProtoToBinaryFile(const google::protobuf::Message& proto,
-                            const char* filename);
+
 /**
  * Locate the position of the arg in arglist.
  *
@@ -102,6 +95,39 @@ class Metric {
   std::unordered_map<std::string, std::pair<int, float>> entry_;
 };
 
+using google::protobuf::Message;
+void Im2col(const float* data_im, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_col);
+void Col2im(const float* data_col, const int channels,
+    const int height, const int width, const int patch_h, const int patch_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_im);
+void ForwardMaxPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top, float* mask);
+void BackwardMaxPooling(const float* top, const float* mask, const int num,
+    const int channels, const int height, const int width,
+    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    float* bottom);
+void ForwardAvgPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top);
+void BackwardAvgPooling(const float* top, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* bottom);
+
+void ReadProtoFromTextFile(const char* filename, Message* proto);
+void WriteProtoToTextFile(const Message& proto, const char* filename);
+void ReadProtoFromBinaryFile(const char* filename, Message* proto);
+void WriteProtoToBinaryFile(const Message& proto, const char* filename);
+
+
 }  // namespace singa
 
 #endif  // SINGA_UTILS_COMMON_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index f017f45..a891a08 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -32,6 +32,8 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<BridgeDstLayer, int>(kBridgeDst);
   RegisterLayer<BridgeSrcLayer, int>(kBridgeSrc);
   RegisterLayer<ConvolutionLayer, int>(kConvolution);
+  RegisterLayer<CConvolutionLayer, int>(kCConvolution);
+  RegisterLayer<CPoolingLayer, int>(kCPooling);
   RegisterLayer<ConcateLayer, int>(kConcate);
   RegisterLayer<DropoutLayer, int>(kDropout);
   RegisterLayer<EuclideanLossLayer, int>(kEuclideanLoss);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/neuralnet/neuron_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer.cc b/src/neuralnet/neuron_layer.cc
index b86d7da..edfa022 100644
--- a/src/neuralnet/neuron_layer.cc
+++ b/src/neuralnet/neuron_layer.cc
@@ -134,6 +134,49 @@ void ConvolutionLayer::ComputeGradient(int flag, Metric* perf) {
   }
 }
 
+/******************* Implementation for CConvolutionLayer *********/
+void CConvolutionLayer::ComputeFeature(int flag, Metric* perf) {
+  auto src = Tensor4(srclayers_[0]->mutable_data(this));
+  auto data = Tensor3(&data_);
+  auto col = Tensor2(&col_data_);
+  auto weight = Tensor2(weight_->mutable_data());
+  auto bias = Tensor1(bias_->mutable_data());
+
+  for (int n = 0; n < batchsize_; n++) {
+    Im2col(src[n].dptr, channels_, height_, width_,
+        kernel_, kernel_, pad_, pad_, stride_, stride_, col.dptr);
+    data[n] = dot(weight, col);
+  }
+  data += expr::broadcast<1>(bias, data.shape);
+}
+
+void CConvolutionLayer::ComputeGradient(int flag, Metric* perf) {
+  auto src = Tensor4(srclayers_[0]->mutable_data(this));
+  auto col = Tensor2(&col_data_);
+  auto weight = Tensor2(weight_->mutable_data());
+
+  auto grad = Tensor3(&grad_);
+  auto gcol = Tensor2(&col_grad_);
+  auto gweight = Tensor2(weight_->mutable_grad());
+  auto gbias = Tensor1(bias_->mutable_grad());
+  gweight = 0.f;
+  Blob<float>* gsrcblob = srclayers_[0]->mutable_grad(this);
+  Tensor<cpu, 4> gsrc(nullptr, Shape4(batchsize_, channels_, height_, width_));
+  if (gsrcblob != nullptr)
+    gsrc.dptr = gsrcblob->mutable_cpu_data();
+  gbias = expr::sumall_except_dim<1>(grad);
+  for(int n = 0; n < batchsize_; n++) {
+    Im2col(src[n].dptr, channels_, height_, width_,
+        kernel_, kernel_, pad_, pad_, stride_, stride_, col.dptr);
+    gweight += dot(grad[n], col.T());
+    if (gsrcblob != nullptr) {
+      gcol = dot(weight.T(), grad[n]);
+      Col2im(gcol.dptr, channels_, height_, width_,
+          kernel_, kernel_, pad_, pad_, stride_, stride_, gsrc[n].dptr);
+    }
+  }
+}
+
 /****************** Implementation for DropoutLayer ***********************/
 void DropoutLayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);
@@ -430,7 +473,7 @@ void PoolingLayer::Setup(const LayerProto& proto, int npartitions) {
   stride_ = pool_conf.stride();
   CHECK_LT(pad_, kernel_);
   pool_ = proto.pooling_conf().pool();
-  CHECK(pool_ == PoolingProto_PoolMethod_AVE
+  CHECK(pool_ == PoolingProto_PoolMethod_AVG
         || pool_ == PoolingProto_PoolMethod_MAX)
         << "Padding implemented only for average and max pooling.";
   const auto& srcshape = srclayers_[0]->data(this).shape();
@@ -455,7 +498,7 @@ void PoolingLayer::ComputeFeature(int flag, Metric* perf) {
   auto data = Tensor4(&data_);
   if (pool_ == PoolingProto_PoolMethod_MAX)
     data = expr::pool<red::maximum>(src, kernel_, stride_);
-  else if (pool_ == PoolingProto_PoolMethod_AVE)
+  else if (pool_ == PoolingProto_PoolMethod_AVG)
     data = expr::pool<red::sum>(src, kernel_, stride_)
       * (1.0f / (kernel_ * kernel_));
 }
@@ -471,11 +514,44 @@ void PoolingLayer::ComputeGradient(int flag, Metric* perf) {
   auto grad = Tensor4(&grad_);
   if (pool_ == PoolingProto_PoolMethod_MAX)
     gsrc = expr::unpool<red::maximum>(src, data, grad, kernel_, stride_);
-  else if (pool_ == PoolingProto_PoolMethod_AVE)
+  else if (pool_ == PoolingProto_PoolMethod_AVG)
     gsrc = expr::unpool<red::sum>(src, data, grad, kernel_, stride_)
            * (1.0f / (kernel_ * kernel_));
 }
 
+/***************** Implementation of CPoolingLayer ***************/
+
+void CPoolingLayer::Setup(const LayerProto& proto, int npartitions) {
+  PoolingLayer::Setup(proto, npartitions);
+  if(pool_ == PoolingProto_PoolMethod_MAX)
+    mask_.ReshapeLike(data_);
+}
+void CPoolingLayer::ComputeFeature(int flag, Metric* perf) {
+  if(pool_ == PoolingProto_PoolMethod_MAX)
+    ForwardMaxPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(),
+        batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, stride_, data_.mutable_cpu_data(), mask_.mutable_cpu_data());
+  else if(pool_ == PoolingProto_PoolMethod_AVG)
+    ForwardAvgPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(),
+        batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, stride_, data_.mutable_cpu_data());
+  else
+    LOG(FATAL) << "unknow pooling method";
+}
+
+void CPoolingLayer::ComputeGradient(int flag, Metric* perf) {
+  if(pool_ == PoolingProto_PoolMethod_MAX)
+    BackwardMaxPooling(grad_.cpu_data(), mask_.cpu_data(), batchsize_,
+        channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data());
+  else if(pool_ == PoolingProto_PoolMethod_AVG)
+    BackwardAvgPooling(grad_.cpu_data(), batchsize_,
+        channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data());
+  else
+    LOG(FATAL) << "unknow pooling method";
+}
+
 /***************** Implementation for ReLULayer *****************************/
 void ReLULayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 9adae6d..7861eae 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -389,7 +389,7 @@ message PoolingProto {
   required int32 kernel= 1;
   enum PoolMethod {
     MAX = 0;
-    AVE = 1;
+    AVG = 1;
   }
   // The pooling method
   optional PoolMethod pool = 30 [default = MAX];
@@ -514,6 +514,8 @@ enum LayerType {
   // Neuron layers
   //  - Feature transformation
   kConvolution = 1;
+  kCConvolution = 27;
+  kCPooling = 28;
   kDropout = 4;
   kInnerProduct = 5;
   kLRN = 6;
@@ -535,6 +537,7 @@ enum LayerType {
   kSlice = 12;
   kSplit = 13;
 
+
   // Indicate the user defined layer. Users should configure user_type
   kUserLayer = 102;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/trainer/trainer.cc
----------------------------------------------------------------------
diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc
index 1d78c37..b6dc729 100644
--- a/src/trainer/trainer.cc
+++ b/src/trainer/trainer.cc
@@ -481,7 +481,6 @@ const vector<Msg*> Trainer::HandleUpdate(ParamEntry *entry, Msg** msg) {
         mshadow::Tensor<mshadow::cpu,1> grad((*it)->mutable_cpu_grad(), shape);
         sum += grad;
       }
-      sum /= entry->num_total;
     }
     int step = (*msg)->trgt_version();
     GenMsgs(kUpdate, step, entry, *msg, &ret);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/utils/blob.cc
----------------------------------------------------------------------
diff --git a/src/utils/blob.cc b/src/utils/blob.cc
index fd402a8..3df1aef 100644
--- a/src/utils/blob.cc
+++ b/src/utils/blob.cc
@@ -1,5 +1,5 @@
 /**
- * The code is adapted from that of Caffe whose license is attached.
+ * The code is adapted from Caffe whose license is attached.
  *
  * COPYRIGHT
  * All contributions by the University of California:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/utils/common.cc
----------------------------------------------------------------------
diff --git a/src/utils/common.cc b/src/utils/common.cc
index d13faea..3c3dc39 100644
--- a/src/utils/common.cc
+++ b/src/utils/common.cc
@@ -1,32 +1,70 @@
+/**
+ * The some functions in this file are adapted from Caffe whose license
+ * is attached.
+ *
+ * COPYRIGHT
+ * All contributions by the University of California:
+ * Copyright (c) 2014, The Regents of the University of California (Regents)
+ * All rights reserved.
+ * All other contributions:
+ * Copyright (c) 2014, the respective contributors
+ * All rights reserved.
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ * LICENSE
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * CONTRIBUTION AGREEMENT
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ */
 #include "utils/common.h"
 
-#include <arpa/inet.h>
-#include <fcntl.h>
-#include <glog/logging.h>
-#include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl.h>
-#include <google/protobuf/text_format.h>
-#include <stdarg.h>
-#include <stdio.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+
 #include <netinet/in.h>
 #include <net/if.h>
+#include <arpa/inet.h>
+
+#include <stdarg.h>
+#include <stdio.h>
 #include <time.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <cfloat>
+
+#include <glog/logging.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
 
 namespace singa {
 
 using std::string;
 using std::vector;
-using google::protobuf::io::CodedInputStream;
-using google::protobuf::io::FileInputStream;
-using google::protobuf::io::FileOutputStream;
-using google::protobuf::io::ZeroCopyInputStream;
-using google::protobuf::Message;
-
 const int kBufLen = 1024;
 
 string IntVecToString(const vector<int>& vec) {
@@ -56,42 +94,7 @@ string StringPrintf(string fmt, ...) {
   return result;
 }
 
-// the proto related functions are from Caffe.
-void ReadProtoFromTextFile(const char* filename, Message* proto) {
-  int fd = open(filename, O_RDONLY);
-  CHECK_NE(fd, -1) << "File not found: " << filename;
-  FileInputStream* input = new FileInputStream(fd);
-  CHECK(google::protobuf::TextFormat::Parse(input, proto));
-  delete input;
-  close(fd);
-}
 
-void WriteProtoToTextFile(const Message& proto, const char* filename) {
-  int fd = open(filename, O_WRONLY | O_CREAT, 0644);
-  FileOutputStream* output = new FileOutputStream(fd);
-  CHECK(google::protobuf::TextFormat::Print(proto, output));
-  delete output;
-  close(fd);
-}
-
-void ReadProtoFromBinaryFile(const char* filename, Message* proto) {
-  int fd = open(filename, O_RDONLY);
-  CHECK_NE(fd, -1) << "File not found: " << filename;
-  ZeroCopyInputStream* raw_input = new FileInputStream(fd);
-  CodedInputStream* coded_input = new CodedInputStream(raw_input);
-  // upper limit 512MB, warning threshold 256MB
-  coded_input->SetTotalBytesLimit(536870912, 268435456);
-  CHECK(proto->ParseFromCodedStream(coded_input));
-  delete coded_input;
-  delete raw_input;
-  close(fd);
-}
-
-void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
-  int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
-  CHECK_NE(fd, -1) << "File cannot open: " << filename;
-  CHECK(proto.SerializeToFileDescriptor(fd));
-}
 
 int ArgPos(int argc, char** arglist, const char* arg) {
   for (int i = 0; i < argc; i++) {
@@ -293,4 +296,247 @@ void Metric::ParseFrom(const string& msg) {
   }
 }
 
+
+/*************Below functions are adapted from Caffe ************/
+using google::protobuf::io::CodedInputStream;
+using google::protobuf::io::FileInputStream;
+using google::protobuf::io::FileOutputStream;
+using google::protobuf::io::ZeroCopyInputStream;
+
+
+void Im2col(const float* data_im, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_col) {
+  int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+  int channels_col = channels * kernel_h * kernel_w;
+  for (int c = 0; c < channels_col; ++c) {
+    int w_offset = c % kernel_w;
+    int h_offset = (c / kernel_w) % kernel_h;
+    int c_im = c / kernel_h / kernel_w;
+    for (int h = 0; h < height_col; ++h) {
+      for (int w = 0; w < width_col; ++w) {
+        int h_pad = h * stride_h - pad_h + h_offset;
+        int w_pad = w * stride_w - pad_w + w_offset;
+        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
+          data_col[(c * height_col + h) * width_col + w] =
+            data_im[(c_im * height + h_pad) * width + w_pad];
+        else
+          data_col[(c * height_col + h) * width_col + w] = 0;
+      }
+    }
+  }
+}
+
+void Col2im(const float* data_col, const int channels,
+    const int height, const int width, const int patch_h, const int patch_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_im) {
+  memset(data_im, 0, height * width * channels * sizeof(float));
+  int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
+  int channels_col = channels * patch_h * patch_w;
+  for (int c = 0; c < channels_col; ++c) {
+    int w_offset = c % patch_w;
+    int h_offset = (c / patch_w) % patch_h;
+    int c_im = c / patch_h / patch_w;
+    for (int h = 0; h < height_col; ++h) {
+      for (int w = 0; w < width_col; ++w) {
+        int h_pad = h * stride_h - pad_h + h_offset;
+        int w_pad = w * stride_w - pad_w + w_offset;
+        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
+          data_im[(c_im * height + h_pad) * width + w_pad] +=
+            data_col[(c * height_col + h) * width_col + w];
+      }
+    }
+  }
+}
+
+void ForwardMaxPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top, float* mask) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_count = num * top_height * top_width * channels;
+  for (int i = 0; i < top_count; i++) {
+    mask[i] = -1;
+    top[i] = -FLT_MAX;
+  }
+  const int bottom_offset =  height * width;
+  const int top_offset = top_height * top_width;
+  // The main loop
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          int hstart = ph * stride_h - pad_h;
+          int wstart = pw * stride_w - pad_w;
+          int hend = std::min(hstart + kernel_h, height);
+          int wend = std::min(wstart + kernel_w, width);
+          hstart = std::max(hstart, 0);
+          wstart = std::max(wstart, 0);
+          const int top_index = ph * top_width + pw;
+          for (int h = hstart; h < hend; ++h) {
+            for (int w = wstart; w < wend; ++w) {
+              const int index = h * width + w;
+              if (bottom[index] > top[top_index]) {
+                top[top_index] = bottom[index];
+                mask[top_index] = index;
+              }
+            }
+          }
+        }
+      }
+      // compute offset
+      bottom += bottom_offset;
+      top += top_offset;
+      mask += top_offset;
+    }
+  }
+}
+
+void BackwardMaxPooling(const float* top, const float* mask, const int num,
+    const int channels, const int height, const int width,
+    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    float* bottom) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  const int top_offset = top_height * top_width;
+  const int bottom_offset = height * width;
+  memset(bottom, 0, sizeof(float) * num * channels * bottom_offset);
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          const int top_idx = ph * top_width + pw;
+          const int bottom_idx = static_cast<int>(mask[top_idx]);
+          bottom[bottom_idx] += top[top_idx];
+        }
+      }
+      top += top_offset;
+      mask += top_offset;
+      bottom += bottom_offset;
+    }
+  }
+}
+
+void ForwardAvgPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_count = num * top_height * top_width * channels;
+  for (int i = 0; i < top_count; i++) {
+    top[i] = 0;
+  }
+  const int bottom_offset =  height * width;
+  const int top_offset = top_height * top_width;
+  // The main loop
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          int hstart = ph * stride_h - pad_h;
+          int wstart = pw * stride_w - pad_w;
+          int hend = std::min(hstart + kernel_h, height+pad_h);
+          int wend = std::min(wstart + kernel_w, width+pad_w);
+          int pool_size = (hend-hstart) * (wend-wstart);
+          hstart = std::max(hstart, 0);
+          wstart = std::max(wstart, 0);
+          hend = std::min(hend, height);
+          wend = std::min(wend, width);
+          const int top_index = ph * top_width + pw;
+          for (int h = hstart; h < hend; ++h) {
+            for (int w = wstart; w < wend; ++w) {
+              const int index = h * width + w;
+              top[top_index] += bottom[index];
+            }
+          }
+          top[top_index] /= pool_size;
+        }
+      }
+      // compute offset
+      bottom += bottom_offset;
+      top += top_offset;
+    }
+  }
+}
+
+void BackwardAvgPooling(const float* top, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* bottom) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  const int top_offset = top_height * top_width;
+  const int bottom_offset = height * width;
+  memset(bottom, 0, sizeof(float) * num * channels * bottom_offset);
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          int hstart = ph * stride_h - pad_h;
+          int wstart = pw * stride_w - pad_w;
+          int hend = std::min(hstart + kernel_h, height+pad_h);
+          int wend = std::min(wstart + kernel_w, width+pad_w);
+          int pool_size = (hend-hstart) * (wend-wstart);
+          hstart = std::max(hstart, 0);
+          wstart = std::max(wstart, 0);
+          hend = std::min(hend, height);
+          wend = std::min(wend, width);
+          const int top_index = ph * top_width + pw;
+          for (int h = hstart; h < hend; ++h) {
+            for (int w = wstart; w < wend; ++w) {
+              const int index = h * width + w;
+              bottom[index] += top[top_index] / pool_size;
+            }
+          }
+
+        }
+      }
+      top += top_offset;
+      bottom += bottom_offset;
+    }
+  }
+}
+
+void ReadProtoFromTextFile(const char* filename, Message* proto) {
+  int fd = open(filename, O_RDONLY);
+  CHECK_NE(fd, -1) << "File not found: " << filename;
+  FileInputStream* input = new FileInputStream(fd);
+  CHECK(google::protobuf::TextFormat::Parse(input, proto));
+  delete input;
+  close(fd);
+}
+
+void WriteProtoToTextFile(const Message& proto, const char* filename) {
+  int fd = open(filename, O_WRONLY | O_CREAT, 0644);
+  FileOutputStream* output = new FileOutputStream(fd);
+  CHECK(google::protobuf::TextFormat::Print(proto, output));
+  delete output;
+  close(fd);
+}
+
+void ReadProtoFromBinaryFile(const char* filename, Message* proto) {
+  int fd = open(filename, O_RDONLY);
+  CHECK_NE(fd, -1) << "File not found: " << filename;
+  ZeroCopyInputStream* raw_input = new FileInputStream(fd);
+  CodedInputStream* coded_input = new CodedInputStream(raw_input);
+  // upper limit 512MB, warning threshold 256MB
+  coded_input->SetTotalBytesLimit(536870912, 268435456);
+  CHECK(proto->ParseFromCodedStream(coded_input));
+  delete coded_input;
+  delete raw_input;
+  close(fd);
+}
+
+void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
+  int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+  CHECK_NE(fd, -1) << "File cannot open: " << filename;
+  CHECK(proto.SerializeToFileDescriptor(fd));
+}
 }  // namespace singa


[2/3] incubator-singa git commit: SINGA-51 Improve the convolution and pooling operations

Posted by wa...@apache.org.
SINGA-51 Improve the convolution and pooling operations

merge with pull request #64


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e7691422
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e7691422
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e7691422

Branch: refs/heads/master
Commit: e7691422c1c09b3ebea5ff399be1724ec72a5c9d
Parents: 6d59eec 077d380
Author: wang sheng <wa...@gmail.com>
Authored: Mon Sep 14 12:11:56 2015 +0800
Committer: wang sheng <wa...@gmail.com>
Committed: Mon Sep 14 12:11:56 2015 +0800

----------------------------------------------------------------------
 Makefile.am             |  69 +++++++++++++++++++++-----
 configure.ac            |  25 ++++++++++
 src/test/test_common.cc | 113 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 196 insertions(+), 11 deletions(-)
----------------------------------------------------------------------



[3/3] incubator-singa git commit: SINGA-51 Improve the convolution and pooling operations

Posted by wa...@apache.org.
SINGA-51 Improve the convolution and pooling operations

minor changes
  - format test_common.cc
  - format include patterns in driver, layers
    all cc file include own h file in first line


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d5d817e1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d5d817e1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d5d817e1

Branch: refs/heads/master
Commit: d5d817e145b1b8d60d8ae3781c24aa0916f4ddf7
Parents: e769142
Author: wang sheng <wa...@gmail.com>
Authored: Mon Sep 14 16:57:10 2015 +0800
Committer: wang sheng <wa...@gmail.com>
Committed: Mon Sep 14 16:57:10 2015 +0800

----------------------------------------------------------------------
 include/driver.h                     |   9 +-
 include/neuralnet/connection_layer.h |   7 +-
 include/neuralnet/input_layer.h      |   2 +
 include/neuralnet/layer.h            |   6 +-
 include/neuralnet/loss_layer.h       |   5 +-
 include/neuralnet/neuron_layer.h     |   4 +-
 include/neuralnet/output_layer.h     |   2 +
 src/driver.cc                        |   4 +-
 src/neuralnet/connection_layer.cc    |   6 +-
 src/neuralnet/input_layer.cc         |   7 +-
 src/neuralnet/layer.cc               |   2 +
 src/neuralnet/loss_layer.cc          |   7 +-
 src/neuralnet/neuron_layer.cc        |  21 ++--
 src/neuralnet/output_layer.cc        |   3 +-
 src/test/test_common.cc              | 169 +++++++++++++++---------------
 src/utils/common.cc                  |  17 ++-
 16 files changed, 142 insertions(+), 129 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/include/driver.h
----------------------------------------------------------------------
diff --git a/include/driver.h b/include/driver.h
index 5a9ddfc..b16cef3 100644
--- a/include/driver.h
+++ b/include/driver.h
@@ -1,5 +1,6 @@
 #ifndef SINGA_DRIVER_H_
 #define SINGA_DRIVER_H_
+
 #include "singa.h"
 
 namespace singa {
@@ -110,24 +111,28 @@ int Driver::RegisterParam(const Type& type) {
   factory->Register(type, CreateInstance(Subclass, Param));
   return 1;
 }
+
 template<typename Subclass, typename Type>
 int Driver::RegisterParamGenerator(const Type& type) {
   auto factory = Singleton<Factory<singa::ParamGenerator>>::Instance();
   factory->Register(type, CreateInstance(Subclass, ParamGenerator));
   return 1;
 }
+
 template<typename Subclass, typename Type>
 int Driver::RegisterUpdater(const Type& type) {
   auto factory = Singleton<Factory<singa::Updater>>::Instance();
   factory->Register(type, CreateInstance(Subclass, Updater));
   return 1;
 }
+
 template<typename Subclass, typename Type>
 int Driver::RegisterLRGenerator(const Type& type) {
   auto factory = Singleton<Factory<singa::LRGenerator>>::Instance();
   factory->Register(type, CreateInstance(Subclass, LRGenerator));
   return 1;
 }
+
 template<typename Subclass, typename Type>
 int Driver::RegisterWorker(const Type& type) {
   auto factory = Singleton<Factory<singa::Worker>>::Instance();
@@ -135,8 +140,6 @@ int Driver::RegisterWorker(const Type& type) {
   return 1;
 }
 
-
 }  // namespace singa
-#endif  // SINGA_DRIVER_H_
-
 
+#endif  // SINGA_DRIVER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/include/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/connection_layer.h b/include/neuralnet/connection_layer.h
index e44f4f3..233714d 100644
--- a/include/neuralnet/connection_layer.h
+++ b/include/neuralnet/connection_layer.h
@@ -1,5 +1,7 @@
 #ifndef SINGA_NEURALNET_CONNECTION_LAYER_H_
 #define SINGA_NEURALNET_CONNECTION_LAYER_H_
+
+#include <vector>
 #include "neuralnet/layer.h"
 
 /**
@@ -120,6 +122,7 @@ class SplitLayer : public ConnectionLayer {
  protected:
   Blob<float> grads_;
 };
-}
-//  namespace singa
+
+}  // namespace singa
+
 #endif  // SINGA_NEURALNET_CONNECTION_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/include/neuralnet/input_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/input_layer.h b/include/neuralnet/input_layer.h
index 62595c6..67af2eb 100644
--- a/include/neuralnet/input_layer.h
+++ b/include/neuralnet/input_layer.h
@@ -1,6 +1,7 @@
 #ifndef SINGA_NEURALNET_INPUT_LAYER_H_
 #define SINGA_NEURALNET_INPUT_LAYER_H_
 
+#include <string>
 #include <vector>
 #include "neuralnet/layer.h"
 #include "utils/data_shard.h"
@@ -165,6 +166,7 @@ class PrefetchLayer : public Layer {
  protected:
   std::thread thread_;
 };
+
 }  // namespace singa
 
 #endif  // SINGA_NEURALNET_INPUT_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index 56201f5..4f153d3 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -5,7 +5,6 @@
 #include <string>
 #include <thread>
 #include <vector>
-
 #include "proto/common.pb.h"
 #include "proto/job.pb.h"
 #include "utils/common.h"
@@ -13,8 +12,6 @@
 #include "utils/param.h"
 
 namespace singa {
-using std::vector;
-using std::string;
 
 /**
  * Base layer class.
@@ -207,10 +204,11 @@ class LossLayer : public Layer {
 };
 
 }  // namespace singa
+
 #include "neuralnet/connection_layer.h"
 #include "neuralnet/input_layer.h"
 #include "neuralnet/loss_layer.h"
 #include "neuralnet/neuron_layer.h"
 #include "neuralnet/output_layer.h"
 
-#endif  // SINGA_NEURALNET_BASE_LAYER_H_
+#endif  // SINGA_NEURALNET_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/include/neuralnet/loss_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/loss_layer.h b/include/neuralnet/loss_layer.h
index 8358bd6..c9f6681 100644
--- a/include/neuralnet/loss_layer.h
+++ b/include/neuralnet/loss_layer.h
@@ -41,6 +41,7 @@ class SoftmaxLossLayer : public LossLayer {
   float scale_;
   int topk_;
 };
-}
-//  namespace singa
+
+}  // namespace singa
+
 #endif  // SINGA_NEURALNET_LOSS_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/include/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuron_layer.h b/include/neuralnet/neuron_layer.h
index dd45eec..86b55a3 100644
--- a/include/neuralnet/neuron_layer.h
+++ b/include/neuralnet/neuron_layer.h
@@ -1,9 +1,10 @@
 #ifndef SINGA_NEURALNET_NEURON_LAYER_H_
 #define SINGA_NEURALNET_NEURON_LAYER_H_
-#include <vector>
 
+#include <vector>
 #include "neuralnet/layer.h"
 #include "proto/job.pb.h"
+
 /**
  * \file this file includes the declarations neuron layer classes that conduct
  * the transformation of features.
@@ -221,7 +222,6 @@ class RBMHidLayer: public RBMLayer {
   RBMLayer *vis_layer_;
 };
 
-
 }  // namespace singa
 
 #endif  // SINGA_NEURALNET_NEURON_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/include/neuralnet/output_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/output_layer.h b/include/neuralnet/output_layer.h
index c507e1c..ac83d00 100644
--- a/include/neuralnet/output_layer.h
+++ b/include/neuralnet/output_layer.h
@@ -1,4 +1,6 @@
 #ifndef SINGA_NEURALNET_OUTPUT_LAYER_H_
 #define SINGA_NEURALNET_OUTPUT_LAYER_H_
+
 // currently no output sub-classes are defined
+
 #endif  // SINGA_NEURALNET_OUTPUT_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index a891a08..88bf4aa 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -1,10 +1,8 @@
+#include "driver.h"
 
 #include <cblas.h>
 #include <glog/logging.h>
 #include <string>
-
-#include "singa.h"
-
 #include "utils/tinydir.h"
 
 namespace singa {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/neuralnet/connection_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer.cc b/src/neuralnet/connection_layer.cc
index a3d0a75..e247161 100644
--- a/src/neuralnet/connection_layer.cc
+++ b/src/neuralnet/connection_layer.cc
@@ -1,7 +1,9 @@
-#include <vector>
-#include "neuralnet/layer.h"
+#include "neuralnet/connection_layer.h"
 
 namespace singa {
+
+using std::vector;
+
 /************* Implementation for ConcateLayer ***********/
 void ConcateLayer::Setup(const LayerProto& proto, int npartitions) {
   // CHECK_EQ(npartitions, 1);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/neuralnet/input_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer.cc b/src/neuralnet/input_layer.cc
index b1c6986..f7167da 100644
--- a/src/neuralnet/input_layer.cc
+++ b/src/neuralnet/input_layer.cc
@@ -1,8 +1,7 @@
-#include <vector>
-#include <string>
+#include "neuralnet/input_layer.h"
 
-#include "neuralnet/layer.h"
 #include "mshadow/tensor.h"
+
 namespace singa {
 
 using namespace mshadow;
@@ -10,6 +9,8 @@ using mshadow::cpu;
 using mshadow::Shape4;
 using mshadow::Tensor;
 
+using std::string;
+using std::vector;
 
 /************* Implementation for ParserLayer ***********/
 void ParserLayer::ComputeFeature(int flag, Metric *perf) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index 7e2e107..f38d592 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -9,6 +9,8 @@
 
 namespace singa {
 
+using std::string;
+
 Layer* Layer::Create(const LayerProto& proto) {
   auto* factory = Singleton<Factory<Layer>>::Instance();
   Layer* layer = nullptr;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/neuralnet/loss_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/loss_layer.cc b/src/neuralnet/loss_layer.cc
index 118456a..f9b80a9 100644
--- a/src/neuralnet/loss_layer.cc
+++ b/src/neuralnet/loss_layer.cc
@@ -1,9 +1,10 @@
+#include "neuralnet/loss_layer.h"
+
 #include <glog/logging.h>
-#include "neuralnet/layer.h"
 #include "mshadow/tensor.h"
 
-
 namespace singa {
+
 using namespace mshadow;
 using mshadow::cpu;
 
@@ -14,6 +15,8 @@ using mshadow::Shape3;
 using mshadow::Shape4;
 using mshadow::Tensor;
 
+using std::string;
+using std::vector;
 
 /********** * Implementation for EuclideanLossLayer*************************/
 void EuclideanLossLayer::ComputeFeature(int flag, Metric* perf) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/neuralnet/neuron_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer.cc b/src/neuralnet/neuron_layer.cc
index edfa022..a98b40d 100644
--- a/src/neuralnet/neuron_layer.cc
+++ b/src/neuralnet/neuron_layer.cc
@@ -1,10 +1,11 @@
+#include "neuralnet/neuron_layer.h"
+
 #include <glog/logging.h>
 #include <algorithm>
-
-#include "neuralnet/layer.h"
 #include "utils/singleton.h"
 #include "mshadow/tensor.h"
 #include "mshadow/cxxnet_op.h"
+
 namespace singa {
 
 using namespace mshadow;
@@ -165,7 +166,7 @@ void CConvolutionLayer::ComputeGradient(int flag, Metric* perf) {
   if (gsrcblob != nullptr)
     gsrc.dptr = gsrcblob->mutable_cpu_data();
   gbias = expr::sumall_except_dim<1>(grad);
-  for(int n = 0; n < batchsize_; n++) {
+  for (int n = 0; n < batchsize_; n++) {
     Im2col(src[n].dptr, channels_, height_, width_,
         kernel_, kernel_, pad_, pad_, stride_, stride_, col.dptr);
     gweight += dot(grad[n], col.T());
@@ -230,7 +231,7 @@ Blob<float>* RBMLayer::Sample(int flag) {
     &sample_ : &neg_sample_;
 }
 void RBMLayer::Setup(const LayerProto& proto, int npartitions) {
-  CHECK_EQ(npartitions, 1);  //  TODO test for npartitions > 1
+  CHECK_EQ(npartitions, 1);  // TODO(wangwei) test for npartitions > 1
   Layer::Setup(proto, npartitions);
   hdim_ = proto.rbm_conf().hdim();
   gaussian_ = proto.rbm_conf().gaussian();
@@ -523,15 +524,15 @@ void PoolingLayer::ComputeGradient(int flag, Metric* perf) {
 
 void CPoolingLayer::Setup(const LayerProto& proto, int npartitions) {
   PoolingLayer::Setup(proto, npartitions);
-  if(pool_ == PoolingProto_PoolMethod_MAX)
-    mask_.ReshapeLike(data_);
+  if (pool_ == PoolingProto_PoolMethod_MAX)
+      mask_.ReshapeLike(data_);
 }
 void CPoolingLayer::ComputeFeature(int flag, Metric* perf) {
-  if(pool_ == PoolingProto_PoolMethod_MAX)
+  if (pool_ == PoolingProto_PoolMethod_MAX)
     ForwardMaxPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(),
         batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_,
         stride_, stride_, data_.mutable_cpu_data(), mask_.mutable_cpu_data());
-  else if(pool_ == PoolingProto_PoolMethod_AVG)
+  else if (pool_ == PoolingProto_PoolMethod_AVG)
     ForwardAvgPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(),
         batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_,
         stride_, stride_, data_.mutable_cpu_data());
@@ -540,11 +541,11 @@ void CPoolingLayer::ComputeFeature(int flag, Metric* perf) {
 }
 
 void CPoolingLayer::ComputeGradient(int flag, Metric* perf) {
-  if(pool_ == PoolingProto_PoolMethod_MAX)
+  if (pool_ == PoolingProto_PoolMethod_MAX)
     BackwardMaxPooling(grad_.cpu_data(), mask_.cpu_data(), batchsize_,
         channels_, height_, width_, kernel_, kernel_, pad_, pad_,
         stride_, stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data());
-  else if(pool_ == PoolingProto_PoolMethod_AVG)
+  else if (pool_ == PoolingProto_PoolMethod_AVG)
     BackwardAvgPooling(grad_.cpu_data(), batchsize_,
         channels_, height_, width_, kernel_, kernel_, pad_, pad_,
         stride_, stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data());

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/neuralnet/output_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/output_layer.cc b/src/neuralnet/output_layer.cc
index dfc547b..535480e 100644
--- a/src/neuralnet/output_layer.cc
+++ b/src/neuralnet/output_layer.cc
@@ -2,5 +2,4 @@
 
 namespace singa {
 
-
-}
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/test/test_common.cc
----------------------------------------------------------------------
diff --git a/src/test/test_common.cc b/src/test/test_common.cc
index b84e860..e30c9cb 100644
--- a/src/test/test_common.cc
+++ b/src/test/test_common.cc
@@ -1,113 +1,112 @@
-#include "gtest/gtest.h"
-#include "utils/common.h"
-#include <unordered_map>
 #include <string>
+#include <unordered_map>
 #include <vector>
+#include "gtest/gtest.h"
+#include "utils/common.h"
 
 using std::string;
 using std::vector;
 using namespace singa;
 
 TEST(CommonTest, TestIntVecToString) {
-
-    vector<int> num_vec {2, 3, 5, 7, 11};
-    string str = "(2, 3, 5, 7, 11, )";
-    ASSERT_EQ(str, IntVecToString(num_vec));
+  vector<int> num_vec {2, 3, 5, 7, 11};
+  string str = "(2, 3, 5, 7, 11, )";
+  ASSERT_EQ(str, IntVecToString(num_vec));
 }
 
 TEST(CommonTest, TestStringPrintf) {
-    const char* str_a = "abc";
-    const char* str_b = "edfgh";
-    const char* str_c = " !@#";
-    const char* str_d = "1";
-    const char* str_e = "2";
-    const char* str_f = "3";
+  const char* str_a = "abc";
+  const char* str_b = "edfgh";
+  const char* str_c = " !@#";
+  const char* str_d = "1";
+  const char* str_e = "2";
+  const char* str_f = "3";
 
-    string fmt_a = "%s%s%s";
-    string fmt_b = "[%s] [%s] [%s] ";
+  string fmt_a = "%s%s%s";
+  string fmt_b = "[%s] [%s] [%s] ";
 
-    string str_d_a = "abcedfgh !@#";
-    string str_d_b = "[1] [2] [3] ";
+  string str_d_a = "abcedfgh !@#";
+  string str_d_b = "[1] [2] [3] ";
 
-    ASSERT_EQ(str_d_a, StringPrintf(fmt_a, str_a, str_b, str_c));
-    ASSERT_EQ(str_d_b, StringPrintf(fmt_b, str_d, str_e, str_f));
+  ASSERT_EQ(str_d_a, StringPrintf(fmt_a, str_a, str_b, str_c));
+  ASSERT_EQ(str_d_b, StringPrintf(fmt_b, str_d, str_e, str_f));
 }
 
 TEST(CommonTest, TestGCDLCM) {
-    int a = 2, b = 5, c = 10, d = 15;
+  int a = 2, b = 5, c = 10, d = 15;
 
-    ASSERT_EQ(1, gcd(a, b));
-    ASSERT_EQ(5, gcd(c, d));
-    ASSERT_EQ(10, LeastCommonMultiple(b, c));
-    ASSERT_EQ(30, LeastCommonMultiple(c, d));
+  ASSERT_EQ(1, gcd(a, b));
+  ASSERT_EQ(5, gcd(c, d));
+  ASSERT_EQ(10, LeastCommonMultiple(b, c));
+  ASSERT_EQ(30, LeastCommonMultiple(c, d));
 }
 
 TEST(CommonTest, TestMetric) {
-    string str, msg;
-    Metric metric;
-    metric.Add("a", 0.5);
-    metric.Add("b", 0.5);
-    metric.Add("a", 1.5);
-    str = metric.ToLogString();
-    msg = metric.ToString();
-    metric.Reset();
-    metric.ParseFrom(msg);
-    ASSERT_EQ(str, metric.ToLogString());
+  string str, msg;
+  Metric metric;
+  metric.Add("a", 0.5);
+  metric.Add("b", 0.5);
+  metric.Add("a", 1.5);
+  str = metric.ToLogString();
+  msg = metric.ToString();
+  metric.Reset();
+  metric.ParseFrom(msg);
+  ASSERT_EQ(str, metric.ToLogString());
 }
 
 TEST(CommonTest, TestSlice) {
-    vector<vector<int>> slices_0;
-    vector<int> sizes {14112, 96, 256, 884736, 384};
-    ASSERT_EQ(slices_0, Slice(0, sizes));
-    
-    vector<vector<int>> slices_1 {
-        { 14112 },
-        { 96 },
-        { 256 },
-        { 884736 },
-        { 384 },
-    };
-    
-    vector<vector<int>> slices_2 {
-        { 14112 },
-        { 96 },
-        { 256 },
-        { 435328, 449408 },
-        { 384 },
-    };
-        
-    vector<vector<int>> slices_4 {
-        { 14112 },
-        { 96 },
-        { 256 },
-        { 210432,224896,224896,224512 },
-        { 384 },
-    };
-    
-    vector<vector<int>> slices_8 {
-        { 14112 },
-        { 96 },
-        { 256 },
-        { 97984,112448,112448,112448,112448,112448,112448,112064 },
-        { 384 },
-    };
-    
-    ASSERT_EQ(slices_1, Slice(1, sizes));
-    ASSERT_EQ(slices_2, Slice(2, sizes));
-    ASSERT_EQ(slices_4, Slice(4, sizes));
-    ASSERT_EQ(slices_8, Slice(8, sizes));
+  vector<vector<int>> slices_0;
+  vector<int> sizes {14112, 96, 256, 884736, 384};
+  ASSERT_EQ(slices_0, Slice(0, sizes));
+
+  vector<vector<int>> slices_1 {
+    {14112},
+    {96},
+    {256},
+    {884736},
+    {384},
+  };
+
+  vector<vector<int>> slices_2 {
+    {14112},
+    {96},
+    {256},
+    {435328, 449408},
+    {384},
+  };
+
+  vector<vector<int>> slices_4 {
+    {14112},
+    {96},
+    {256},
+    {210432, 224896, 224896, 224512},
+    {384},
+  };
+
+  vector<vector<int>> slices_8 {
+    {14112},
+    {96},
+    {256},
+    {97984, 112448, 112448, 112448, 112448, 112448, 112448, 112064},
+    {384},
+  };
+
+  ASSERT_EQ(slices_1, Slice(1, sizes));
+  ASSERT_EQ(slices_2, Slice(2, sizes));
+  ASSERT_EQ(slices_4, Slice(4, sizes));
+  ASSERT_EQ(slices_8, Slice(8, sizes));
 }
 
 TEST(CommonTest, TestPartitionSlices) {
-    vector<int> slices {
-         97984,112448,112448,112448,112448,112448,112448,112064
-    };
-    vector<int> box_1 { 0, 0, 0, 0, 0, 0, 0, 0 };
-    vector<int> box_2 { 0, 0, 0, 0, 1, 1, 1, 1 };
-    vector<int> box_4 { 0, 0, 1, 1, 2, 2, 3, 3 };
-    vector<int> box_8 { 0, 1, 2, 3, 4, 5, 6, 7 };
-    ASSERT_EQ(box_1, PartitionSlices(1, slices));
-    ASSERT_EQ(box_2, PartitionSlices(2, slices));
-    ASSERT_EQ(box_4, PartitionSlices(4, slices));
-    ASSERT_EQ(box_8, PartitionSlices(8, slices));
+  vector<int> slices {
+    97984, 112448, 112448, 112448, 112448, 112448, 112448, 112064
+  };
+  vector<int> box_1 {0, 0, 0, 0, 0, 0, 0, 0};
+  vector<int> box_2 {0, 0, 0, 0, 1, 1, 1, 1};
+  vector<int> box_4 {0, 0, 1, 1, 2, 2, 3, 3};
+  vector<int> box_8 {0, 1, 2, 3, 4, 5, 6, 7};
+  ASSERT_EQ(box_1, PartitionSlices(1, slices));
+  ASSERT_EQ(box_2, PartitionSlices(2, slices));
+  ASSERT_EQ(box_4, PartitionSlices(4, slices));
+  ASSERT_EQ(box_8, PartitionSlices(8, slices));
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d5d817e1/src/utils/common.cc
----------------------------------------------------------------------
diff --git a/src/utils/common.cc b/src/utils/common.cc
index 3c3dc39..4cf9a89 100644
--- a/src/utils/common.cc
+++ b/src/utils/common.cc
@@ -357,8 +357,8 @@ void ForwardMaxPooling(const float* bottom, const int num, const int channels,
     const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h, const int stride_w,
     float* top, float* mask) {
-  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
-  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_height = (height + pad_h * 2 -kernel_h) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w) / stride_w + 1;
   int top_count = num * top_height * top_width * channels;
   for (int i = 0; i < top_count; i++) {
     mask[i] = -1;
@@ -402,8 +402,8 @@ void BackwardMaxPooling(const float* top, const float* mask, const int num,
     const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
     const int stride_h, const int stride_w,
     float* bottom) {
-  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
-  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_height = (height + pad_h * 2 -kernel_h) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w) / stride_w + 1;
   const int top_offset = top_height * top_width;
   const int bottom_offset = height * width;
   memset(bottom, 0, sizeof(float) * num * channels * bottom_offset);
@@ -427,8 +427,8 @@ void ForwardAvgPooling(const float* bottom, const int num, const int channels,
     const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h, const int stride_w,
     float* top) {
-  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
-  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_height = (height + pad_h * 2 -kernel_h) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w) / stride_w + 1;
   int top_count = num * top_height * top_width * channels;
   for (int i = 0; i < top_count; i++) {
     top[i] = 0;
@@ -470,8 +470,8 @@ void BackwardAvgPooling(const float* top, const int num, const int channels,
     const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h, const int stride_w,
     float* bottom) {
-  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
-  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_height = (height + pad_h * 2 -kernel_h) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w) / stride_w + 1;
   const int top_offset = top_height * top_width;
   const int bottom_offset = height * width;
   memset(bottom, 0, sizeof(float) * num * channels * bottom_offset);
@@ -495,7 +495,6 @@ void BackwardAvgPooling(const float* top, const int num, const int channels,
               bottom[index] += top[top_index] / pool_size;
             }
           }
-
         }
       }
       top += top_offset;