You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/09/04 12:05:15 UTC
[7/7] incubator-singa git commit: SINGA-21 Code review 4

SINGA-21 Code review 4

Update layers for RBM.
The CD algorithm follows Hinton's science paper to do sampling (only
hidden layer is sampled).
May add configuration fields to control the sampling of each layer.
Note. The first gibbs iteration samples the postivie data of the hidden
layer (not the negative data, which is uninitialized).


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ae203036
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ae203036
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ae203036

Branch: refs/heads/master
Commit: ae20303620a952033be67cc005765dd985fe5fe2
Parents: 63adbda
Author: zhaojing <zh...@comp.nus.edu.sg>
Authored: Thu Sep 3 16:10:21 2015 +0800
Committer: wangwei <wa...@comp.nus.edu.sg>
Committed: Fri Sep 4 15:32:04 2015 +0800

----------------------------------------------------------------------
 README.md                        |  15 ++---
 examples/rbm/rbm0.conf           |  31 ++++++----
 examples/rbm/rbm1.conf           |  32 ++++++----
 examples/rbm/rbm2.conf           |  31 ++++++----
 examples/rbm/rbm3.conf           |  30 +++++----
 include/neuralnet/neuron_layer.h |  10 +--
 src/neuralnet/neuron_layer.cc    | 111 ++++++++++++++++++----------------
 src/proto/job.proto              |   6 +-
 8 files changed, 148 insertions(+), 118 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 449e624..f1f490b 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
-##Apache SINGA
-
-Distributed deep learning system
-
-[Project Website](http://singa.incubator.apache.org)
-
-All the details can be found in project website.
+
+##Apache SINGA
+
+Distributed deep learning system
+
+[Project Website](http://singa.incubator.apache.org)
+
+All the details can be found in project website.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm0.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm0.conf b/examples/rbm/rbm0.conf
index a7e503b..dba4f37 100644
--- a/examples/rbm/rbm0.conf
+++ b/examples/rbm/rbm0.conf
@@ -1,14 +1,14 @@
 name: "rbm0"
 train_steps: 6000
 test_steps:100
-test_freq:100
+test_freq:500
 disp_freq: 100
 train_one_batch{
   alg: kCD
 }
 updater{
   type: kSGD
-  momentum: 0.9
+  momentum: 0.8
   weight_decay: 0.0002
   learning_rate{
     base_lr: 0.1
@@ -54,12 +54,21 @@ layer{
   type: kRBMVis
   srclayers:"mnist"
   srclayers:"RBMHid"
+  rbm_conf{
+    hdim: 1000
+  }
   param{
-    name: "w1_"
-    share_from: "w1"
+    name: "w1"
+    init{
+      type: kGaussian
+      mean: 0.0
+      std: 0.1
+    }
   }
+
   param{
     name: "rb11"
+    wd_scale: 0
     init{
       type: kConstant
       value: 0.0
@@ -71,19 +80,17 @@ layer{
   name: "RBMHid"
   type: kRBMHid
   srclayers:"RBMVis"
-  rbmhid_conf{
-    hid_dim: 1000
+  rbm_conf{
+    hdim: 1000
   }
   param{
-    name: "w1"
-    init{
-      type: kGaussian
-      mean: 0.0
-      std: 0.1
-    }
+    name: "w1_"
+    share_from: "w1"
   }
+
   param{
     name: "rb12"
+    wd_scale: 0
     init{
       type: kConstant
       value: 0.0

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm1.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm1.conf b/examples/rbm/rbm1.conf
index db27d3a..ac6c059 100644
--- a/examples/rbm/rbm1.conf
+++ b/examples/rbm/rbm1.conf
@@ -1,7 +1,7 @@
 name: "rbm1"
 train_steps: 6000
 test_steps:100
-test_freq:1000
+test_freq:500
 disp_freq: 100
 train_one_batch{
   alg: kCD
@@ -9,7 +9,7 @@ train_one_batch{
 checkpoint_path: "examples/rbm/rbm0/checkpoint/step6000-worker0.bin"
 updater{
   type: kSGD
-  momentum: 0.9
+  momentum: 0.8
   weight_decay: 0.0002
   learning_rate{
   base_lr: 0.1
@@ -76,12 +76,21 @@ layer{
   type: kRBMVis
   srclayers:"sigmoid1"
   srclayers:"RBMHid"
+  rbm_conf{
+    hdim: 500
+  }
   param{
-    name: "w2_"
-    share_from: "w2"
+    name: "w2"
+    init{
+      type: kGaussian
+      mean: 0.0
+      std: 0.1
+    }
   }
+
   param{
     name: "rb21"
+    wd_scale: 0
     init{
     type: kConstant
     value: 0.0
@@ -93,19 +102,16 @@ layer{
   name: "RBMHid"
   type: kRBMHid
   srclayers:"RBMVis"
-  rbmhid_conf{
-    hid_dim: 500
+  rbm_conf{
+    hdim: 500
   }
   param{
-    name: "w2"
-    init{
-      type: kGaussian
-      mean: 0.0
-      std: 0.1
-    }
+    name: "w2_"
+    share_from: "w2"
   }
-  param{
+ param{
     name: "rb22"
+    wd_scale: 0
     init{
       type: kConstant
       value: 0.0

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm2.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm2.conf b/examples/rbm/rbm2.conf
index fd08907..96841ff 100644
--- a/examples/rbm/rbm2.conf
+++ b/examples/rbm/rbm2.conf
@@ -1,7 +1,7 @@
 name: "rbm2"
 train_steps: 6000
 test_steps:100
-test_freq:1000
+test_freq:500
 disp_freq: 100
 train_one_batch{
   alg: kCD
@@ -10,7 +10,7 @@ checkpoint_path: "examples/rbm/rbm1/checkpoint/step6000-worker0.bin"
 
 updater{
   type: kSGD
-  momentum: 0.9
+  momentum: 0.8
   weight_decay: 0.0002
   learning_rate{
     base_lr: 0.1
@@ -98,12 +98,21 @@ layer{
   type: kRBMVis
   srclayers:"sigmoid2"
   srclayers:"RBMHid"
+  rbm_conf{
+    hdim: 250
+  }
   param{
-    name: "w3_"
-    share_from: "w3"
+    name: "w3"
+    init{
+      type: kGaussian
+      mean: 0.0
+      std: 0.1
+    }
   }
+
   param{
     name: "rb31"
+    wd_scale: 0
     init{
     type: kConstant
     value: 0.0
@@ -115,19 +124,17 @@ layer{
   name: "RBMHid"
   type: kRBMHid
   srclayers:"RBMVis"
-  rbmhid_conf{
-    hid_dim: 250
+  rbm_conf{
+    hdim: 250
   }
   param{
-    name: "w3"
-    init{
-      type: kGaussian
-      mean: 0.0
-      std: 0.1
-    }
+    name: "w3_"
+    share_from: "w3"
   }
+
   param{
     name: "rb32"
+    wd_scale: 0
     init{
     type: kConstant
     value: 0.0

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm3.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm3.conf b/examples/rbm/rbm3.conf
index fe7cc1f..fa60114 100644
--- a/examples/rbm/rbm3.conf
+++ b/examples/rbm/rbm3.conf
@@ -1,7 +1,7 @@
 name: "rbm3"
 train_steps: 6000
 test_steps: 100
-test_freq: 1000
+test_freq: 500
 disp_freq: 100
 train_one_batch{
   alg: kCD
@@ -9,7 +9,7 @@ train_one_batch{
 checkpoint_path: "examples/rbm/rbm2/checkpoint/step6000-worker0.bin"
 updater{
     type: kSGD
-    momentum: 0.9
+    momentum: 0.8
     weight_decay: 0.0002
     learning_rate{
       base_lr: 0.001
@@ -118,13 +118,20 @@ layer{
   type: kRBMVis
   srclayers:"sigmoid3"
   srclayers:"RBMHid"
+  rbm_conf{
+    hdim: 30
+  }
   param{
-    name: "w4_"
-    share_from: "w4"
-
+    name: "w4"
+    init{
+      type: kGaussian
+      mean: 0.0
+      std: 0.1
+    }
   }
   param{
     name: "rb41"
+    wd_scale: 0
     init{
     type: kConstant
     value: 0.0
@@ -136,20 +143,17 @@ layer{
   name: "RBMHid"
   type: kRBMHid
   srclayers:"RBMVis"
-  rbmhid_conf{
-    hid_dim: 30
+  rbm_conf{
+    hdim: 30
     gaussian: true
   }
   param{
-    name: "w4"
-    init{
-      type: kGaussian
-      mean: 0.0
-      std: 0.1
-    }
+    name: "w4_"
+    share_from: "w4"
   }
   param{
     name: "rb42"
+    wd_scale: 0
     init{
     type: kConstant
     value: 0.0

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/include/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuron_layer.h b/include/neuralnet/neuron_layer.h
index ce88ced..e5663d8 100644
--- a/include/neuralnet/neuron_layer.h
+++ b/include/neuralnet/neuron_layer.h
@@ -143,6 +143,7 @@ class SigmoidLayer: public Layer {
 class RBMLayer: public Layer {
  public:
   virtual ~RBMLayer() {}
+  void Setup(const LayerProto& proto, int npartitions) override;
   const Blob<float>& neg_data(const Layer* layer) {
     return neg_data_;
   }
@@ -153,14 +154,17 @@ class RBMLayer: public Layer {
     std::vector<Param*> params{weight_, bias_};
     return params;
   }
-  virtual Blob<float>* Sample(int flat) = 0;
+  virtual Blob<float>* Sample(int flat);
 
  protected:
+  //! if ture, sampling according to guassian distribution
+  bool gaussian_;
   //! dimension of the hidden layer
   int hdim_;
   //! dimension of the visible layer
   int vdim_;
   int batchsize_;
+  bool first_gibbs_;
   Param* weight_, *bias_;
 
   Blob<float> neg_data_;
@@ -177,7 +181,6 @@ class RBMVisLayer: public RBMLayer {
   void Setup(const LayerProto& proto, int npartitions) override;
   void ComputeFeature(int flag, Metric* perf) override;
   void ComputeGradient(int flag, Metric* perf) override;
-  Blob<float>* Sample(int flat) override;
 
  private:
   RBMLayer* hid_layer_;
@@ -192,11 +195,8 @@ class RBMHidLayer: public RBMLayer {
   void Setup(const LayerProto& proto, int npartitions) override;
   void ComputeFeature(int flag, Metric* perf) override;
   void ComputeGradient(int flag, Metric* perf) override;
-  Blob<float>* Sample(int flat) override;
 
  private:
-  // whether use gaussian sampling
-  bool gaussian_;
   RBMLayer *vis_layer_;
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/src/neuralnet/neuron_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer.cc b/src/neuralnet/neuron_layer.cc
index 5feb14b..b86d7da 100644
--- a/src/neuralnet/neuron_layer.cc
+++ b/src/neuralnet/neuron_layer.cc
@@ -164,6 +164,35 @@ void DropoutLayer::ComputeGradient(int flag, Metric* perf)  {
   auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this));
   gsrc = grad * mask;
 }
+
+
+/**************** Implementation for RBMLayer********************/
+Blob<float>* RBMLayer::Sample(int flag) {
+  Tensor<cpu, 2> sample, data;
+  if ((flag & kPositive) == kPositive || first_gibbs_) {
+    data = Tensor2(&data_);
+    sample = Tensor2(&sample_);
+  } else {
+    data = Tensor2(&neg_data_);
+    sample = Tensor2(&neg_sample_);
+  }
+  auto random = TSingleton<Random<cpu>>::Instance();
+  if (gaussian_) {
+    random->SampleGaussian(sample, 0.0f, 1.0f);
+    sample += data;
+  } else {
+    random->SampleBinary(sample, data);
+  }
+  return (flag & kPositive) == kPositive || first_gibbs_ ?
+    &sample_ : &neg_sample_;
+}
+void RBMLayer::Setup(const LayerProto& proto, int npartitions) {
+  CHECK_EQ(npartitions, 1);  //  TODO test for npartitions > 1
+  Layer::Setup(proto, npartitions);
+  hdim_ = proto.rbm_conf().hdim();
+  gaussian_ = proto.rbm_conf().gaussian();
+  first_gibbs_ = true;
+}
 /**************** Implementation for RBMVisLayer********************/
 RBMVisLayer::~RBMVisLayer() {
   delete weight_;
@@ -171,7 +200,7 @@ RBMVisLayer::~RBMVisLayer() {
 }
 
 void RBMVisLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
+  RBMLayer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 2);
   hid_layer_ = nullptr;
   for (auto src : srclayers_) {
@@ -185,31 +214,23 @@ void RBMVisLayer::Setup(const LayerProto& proto, int npartitions) {
   input_layer_ = srclayers_[0] != hid_layer_ ? srclayers_[0]: srclayers_[1];
   const auto& src = input_layer_->data(this);
   batchsize_ = src.shape()[0];
-  data_.ReshapeLike(src);  // this is visible dimension
+  data_.ReshapeLike(src);
   neg_data_.ReshapeLike(data_);
   neg_sample_.ReshapeLike(data_);
+  vdim_ = src.count() / batchsize_;
   weight_ = Param::Create(proto.param(0));
+  weight_ ->Setup(vector<int>{hdim_, vdim_});
   bias_ = Param::Create(proto.param(1));
-  bias_->Setup(vector<int>{src.count() / batchsize_});
-}
-Blob<float>* RBMVisLayer::Sample(int flag) {
-  Tensor<cpu, 2> sample, data;
-  if ((flag & kPositive) == kPositive) {
-    LOG(FATAL) << "RBMVisLayer can not be sampled for positive flag";
-  } else {
-    data = Tensor2(&neg_data_);
-    sample = Tensor2(&neg_sample_);
-  }
-  auto random = TSingleton<Random<cpu>>::Instance();
-  random->SampleBinary(sample, data);
-  return &neg_sample_;
+  bias_->Setup(vector<int>{vdim_});
 }
+
 void RBMVisLayer::ComputeFeature(int flag, Metric* perf) {
-  if ((flag & kPositive) == kPositive) { /*positive flag*/
+  if ((flag & kPositive) == kPositive) {
     data_.CopyFrom(input_layer_->data(this), true);
-  } else if ((flag & kNegative) == kNegative) {   /*negative flag*/
-    auto hid_sample = Tensor2(hid_layer_->Sample(flag));
+    first_gibbs_ = true;
+  } else if ((flag & kNegative) == kNegative) {
     // fetch sampling results from hidden layer
+    auto hid_sample = Tensor2(hid_layer_->Sample(flag));
     auto data = Tensor2(&neg_data_);
     auto weight = Tensor2(weight_->mutable_data());
     auto bias = Tensor1(bias_->mutable_data());
@@ -224,15 +245,25 @@ void RBMVisLayer::ComputeFeature(int flag, Metric* perf) {
       }
       perf->Add("Squared Error", err / batchsize_);
     }
+    first_gibbs_ = false;
   }
 }
 
 void RBMVisLayer::ComputeGradient(int flag, Metric* perf) {
   auto vis_pos = Tensor2(&data_);
   auto vis_neg = Tensor2(&neg_data_);
-    auto gbias = Tensor1(bias_->mutable_grad());
+  auto hid_pos = Tensor2(hid_layer_->mutable_data(this));
+  auto hid_neg = Tensor2(hid_layer_->mutable_neg_data(this));
+
+  auto gbias = Tensor1(bias_->mutable_grad());
   gbias = expr::sum_rows(vis_neg);
   gbias -= expr::sum_rows(vis_pos);
+  gbias /= batchsize_;
+
+  auto gweight = Tensor2(weight_->mutable_grad());
+  gweight = dot(hid_neg.T(), vis_neg);
+  gweight -= dot(hid_pos.T(), vis_pos);
+  gweight /= batchsize_;
 }
 /**************** Implementation for RBMHidLayer********************/
 RBMHidLayer::~RBMHidLayer() {
@@ -242,54 +273,36 @@ RBMHidLayer::~RBMHidLayer() {
 
 void RBMHidLayer::Setup(const LayerProto& proto,
       int npartitions) {
-  Layer::Setup(proto, npartitions);
+  RBMLayer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 1);
   const auto& src_data = srclayers_[0]->data(this);
   batchsize_ = src_data.shape()[0];
-  vdim_ = src_data.count()/batchsize_;
-  hdim_ = proto.rbmhid_conf().hid_dim();
-  gaussian_ = proto.rbmhid_conf().gaussian();
+  vdim_ = src_data.count() / batchsize_;
   data_.Reshape(vector<int>{batchsize_, hdim_});
   neg_data_.ReshapeLike(data_);
   sample_.ReshapeLike(data_);
   neg_sample_.ReshapeLike(data_);
   weight_ = Param::Create(proto.param(0));
+  weight_->Setup(vector<int>{hdim_, vdim_});
   bias_ = Param::Create(proto.param(1));
   bias_->Setup(vector<int>{hdim_});
-  weight_->Setup(vector<int>{hdim_, vdim_});
   vis_layer_ = static_cast<RBMVisLayer*> (srclayers_[0]);
 }
 
-Blob<float>* RBMHidLayer::Sample(int flag) {
-  Tensor<cpu, 2> sample, data;
-  if ((flag & kPositive) == kPositive) {
-    data = Tensor2(&data_);
-    sample = Tensor2(&sample_);
-  } else {
-    data = Tensor2(&neg_data_);
-    sample = Tensor2(&neg_sample_);
-  }
-  auto random = TSingleton<Random<cpu>>::Instance();
-  if (gaussian_) {  // first gibbs
-    random->SampleGaussian(sample, 0.0f, 1.0f);
-    sample += data;
-  } else {
-    random->SampleBinary(sample, data);
-  }
-  return (flag & kPositive) == kPositive ? &sample_ : &neg_sample_;
-}
-
 void RBMHidLayer::ComputeFeature(int flag, Metric* perf) {
   auto weight = Tensor2(weight_->mutable_data());
   auto bias = Tensor1(bias_->mutable_data());
 
   Tensor<cpu, 2> data, src;
-  if ((flag & kPositive) == kPositive) {  /*postive flag*/
+  if ((flag & kPositive) == kPositive) {
     data = Tensor2(&data_);
     src = Tensor2(vis_layer_->mutable_data(this));
+    first_gibbs_ = true;
   } else {
     data = Tensor2(&neg_data_);
-    src = Tensor2(vis_layer_->Sample(flag));
+    // hinton's science paper does not sample the vis layer
+    src = Tensor2(vis_layer_->mutable_neg_data(this));
+    first_gibbs_ = false;
   }
   data = dot(src, weight.T());
   data += expr::repmat(bias, batchsize_);
@@ -301,18 +314,10 @@ void RBMHidLayer::ComputeFeature(int flag, Metric* perf) {
 void RBMHidLayer::ComputeGradient(int flag, Metric* perf) {
   auto hid_pos = Tensor2(&data_);
   auto hid_neg = Tensor2(&neg_data_);
-  auto vis_pos = Tensor2(vis_layer_->mutable_data(this));
-  auto vis_neg = Tensor2(vis_layer_->mutable_data(this));
-
   auto gbias = Tensor1(bias_->mutable_grad());
   gbias = expr::sum_rows(hid_neg);
   gbias -= expr::sum_rows(hid_pos);
   gbias /= batchsize_;
-
-  auto gweight = Tensor2(weight_->mutable_grad());
-  gweight = dot(hid_neg.T(), vis_neg);
-  gweight -= dot(hid_pos.T(), vis_pos);
-  gweight /= batchsize_;
 }
 /*********** Implementation for InnerProductLayer**********/
 InnerProductLayer::~InnerProductLayer() {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 6d45963..dcee10b 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -185,7 +185,7 @@ message LayerProto {
   // configuration for prefetch layer
   optional PrefetchProto prefetch_conf = 44;
   // configuration for rbmhid layer
-  optional RBMHidProto rbmhid_conf = 49;
+  optional RBMProto rbm_conf = 49;
   // configuration for rectified linear unit layer
   optional ReLUProto relu_conf = 38;
   // configuration for rgb image parser layer
@@ -357,8 +357,8 @@ message DropoutProto {
   optional float dropout_ratio = 30 [default = 0.5];
 }
 
-message RBMHidProto {
-  optional int32 hid_dim = 1; // The number of outputs for the layer
+message RBMProto {
+  required int32 hdim = 1; // The number of outputs for the layer
   optional bool bias_term = 2 [default = true]; // whether to have bias terms
   optional bool gaussian = 3 [default = false]; // use gaussian sampling or not
 }