You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/08/28 11:28:47 UTC
[1/2] incubator-singa git commit: SINGA-9 Add Support for Restricted
Boltzman Machine (RBM) model This is to implement RBM in SINGA. To training
RBM models,
the Contrastive Divergence (CD) algorithm is implemented. We have implemented
a BPWorker to run t
Repository: incubator-singa
Updated Branches:
refs/heads/master 6afa895b8 -> fbbcaafdb
SINGA-9 Add Support for Restricted Boltzman Machine (RBM) model
This is to implement RBM in SINGA.
To training RBM models, the Contrastive Divergence (CD) algorithm is implemented.
We have implemented a BPWorker to run the Back-Propagation algorithm. To implement the CD algorithm, we follow the same way
to create a CDWorker whose RunOneBatch function controls the logic of the CD algorithm, including positive phase,
negative phase and computing gradient phase. RBM's layers are different to the layers for feed-forward neural networks,
hence new layers for RBM models are added.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ef4de796
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ef4de796
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ef4de796
Branch: refs/heads/master
Commit: ef4de796303550b1f3e31fd2fddd9eb831db2b06
Parents: 6afa895
Author: zhaojing <zh...@comp.nus.edu.sg>
Authored: Sun Aug 16 15:42:20 2015 +0800
Committer: zhaojing <zh...@comp.nus.edu.sg>
Committed: Thu Aug 20 16:56:03 2015 +0800
----------------------------------------------------------------------
examples/rbm/autoencoder.conf | 299 +++++++++++++++++++++++++++++++++++++
examples/rbm/rbm0.conf | 103 +++++++++++++
examples/rbm/rbm1.conf | 135 +++++++++++++++++
examples/rbm/rbm2.conf | 167 +++++++++++++++++++++
examples/rbm/rbm3.conf | 198 ++++++++++++++++++++++++
include/neuralnet/layer.h | 57 ++++++-
include/trainer/worker.h | 6 +-
src/driver.cc | 6 +-
src/neuralnet/layer.cc | 191 +++++++++++++++++------
src/proto/job.proto | 21 ++-
src/trainer/worker.cc | 22 ++-
11 files changed, 1133 insertions(+), 72 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/autoencoder.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/autoencoder.conf b/examples/rbm/autoencoder.conf
new file mode 100644
index 0000000..9575323
--- /dev/null
+++ b/examples/rbm/autoencoder.conf
@@ -0,0 +1,299 @@
+name: "deep-big-simple-mlp"
+train_steps: 12200
+test_steps:100
+test_freq:100
+disp_freq:20
+checkpoint_after: 1000
+checkpoint_freq: 1000
+checkpoint_path: "examples/rbm/checkpoint/rbm0/checkpoint/step6000-worker0.bin"
+checkpoint_path: "examples/rbm/checkpoint/rbm1/checkpoint/step6000-worker0.bin"
+checkpoint_path: "examples/rbm/checkpoint/rbm2/checkpoint/step6000-worker0.bin"
+checkpoint_path: "examples/rbm/checkpoint/rbm3/checkpoint/step6000-worker0.bin"
+alg: kBP
+updater{
+ type: kAdaGrad
+ learning_rate{
+ base_lr: 0.01
+ type: kFixed
+ }
+}
+
+neuralnet {
+ layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_train_shard"
+ batchsize: 1000
+ }
+ exclude: kTest
+ }
+
+ layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_test_shard"
+ batchsize: 1000
+ }
+ exclude: kTrain
+ }
+
+ layer{
+ name:"mnist"
+ type: kMnist
+ srclayers: "data"
+ mnist_conf {
+ norm_a: 255
+ norm_b: 0
+ }
+ }
+
+ layer{
+ name: "label"
+ type: kLabel
+ srclayers: "data"
+ }
+
+ layer{
+ name: "fc1"
+ type: kInnerProduct
+ srclayers:"mnist"
+ innerproduct_conf{
+ num_output: 1000
+ }
+ param{
+ name: "w1"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb12"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid1"
+ type: kSigmoid
+ srclayers:"fc1"
+ }
+ layer{
+ name: "fc2"
+ type: kInnerProduct
+ srclayers:"sigmoid1"
+ innerproduct_conf{
+ num_output: 500
+ }
+ param{
+ name: "w2"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb22"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid2"
+ type: kSigmoid
+ srclayers:"fc2"
+ }
+
+ layer{
+ name: "fc3"
+ type: kInnerProduct
+ srclayers:"sigmoid2"
+ innerproduct_conf{
+ num_output: 250
+ }
+ param{
+ name: "w3"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb32"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid3"
+ type: kSigmoid
+ srclayers:"fc3"
+ }
+
+ layer{
+ name: "fc4"
+ type: kInnerProduct
+ srclayers:"sigmoid3"
+ innerproduct_conf{
+ num_output: 30
+ }
+ param{
+ name: "w4"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb42"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "fc5"
+ type: kInnerProduct
+ #srclayers:"sigmoid4"
+ srclayers:"fc4"
+ innerproduct_conf{
+ num_output: 250
+ transpose: true
+ }
+ param{
+ name: "w5"
+ share_from: "w4"
+ }
+ param{
+ name: "rb41"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid5"
+ type: kSigmoid
+ srclayers:"fc5"
+ }
+ layer{
+ name: "fc6"
+ type: kInnerProduct
+ srclayers:"sigmoid5"
+ innerproduct_conf{
+ num_output: 500
+ transpose: true
+ }
+ param{
+ name: "w6"
+ share_from: "w3"
+ }
+ param{
+ name: "rb31"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+
+ }
+
+ layer{
+ name: "sigmoid6"
+ type: kSigmoid
+ srclayers:"fc6"
+ }
+ layer{
+ name: "fc7"
+ type: kInnerProduct
+ srclayers:"sigmoid6"
+ innerproduct_conf{
+ num_output: 1000
+ transpose: true
+ }
+ param{
+ name: "w7"
+ share_from: "w2"
+ }
+ param{
+ name: "rb21"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+
+ }
+
+ layer{
+ name: "sigmoid7"
+ type: kSigmoid
+ srclayers:"fc7"
+ }
+ layer{
+ name: "fc8"
+ type: kInnerProduct
+ srclayers:"sigmoid7"
+ innerproduct_conf{
+ num_output: 784
+ transpose: true
+ }
+ param{
+ name: "w8"
+ share_from: "w1"
+ }
+ param{
+ name: "rb11"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+
+ }
+
+ layer{
+ name: "sigmoid8"
+ type: kSigmoid
+ srclayers:"fc8"
+ }
+
+ layer{
+ name: "loss"
+ type:kEuclideanLoss
+ srclayers:"sigmoid8"
+ srclayers:"mnist"
+ }
+}
+cluster {
+ nworker_groups: 1
+ nserver_groups: 1
+ workspace: "examples/rbm/checkpoint/autoencoder/"
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm0.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm0.conf b/examples/rbm/rbm0.conf
new file mode 100644
index 0000000..ef8653f
--- /dev/null
+++ b/examples/rbm/rbm0.conf
@@ -0,0 +1,103 @@
+name: "deep-big-simple-dbm"
+train_steps: 6000
+test_steps:100
+test_freq:100
+disp_freq: 100
+alg: kCD
+checkpoint_after: 500
+checkpoint_freq: 1000
+updater{
+ type: kSGD
+ momentum: 0.9
+ weight_decay: 0.0002
+ learning_rate{
+ base_lr: 0.1
+ type: kFixed
+ }
+}
+
+neuralnet {
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_train_shard"
+ batchsize: 100
+ }
+ exclude: kTest
+}
+
+
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_test_shard"
+ batchsize: 100
+ }
+ exclude: kTrain
+}
+
+
+layer{
+ name:"mnist"
+ type: kMnist
+ srclayers: "data"
+ mnist_conf {
+ norm_a: 255
+ norm_b: 0
+ }
+}
+
+layer{
+ name: "RBMVis"
+ type: kRBMVis
+ srclayers:"mnist"
+ srclayers:"RBMHid"
+ rbmvis_conf{
+ num_output: 1000
+ }
+ param{
+ name: "w1"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
+ }
+ param{
+ name: "rb11"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+
+layer{
+ name: "RBMHid"
+ type: kRBMHid
+ srclayers:"RBMVis"
+ rbmhid_conf{
+ hid_dim: 1000
+ }
+ param{
+ name: "w1_1"
+ share_from: "w1"
+ }
+ param{
+ name: "rb12"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+}
+cluster {
+ nworker_groups: 1
+ nserver_groups: 1
+ nservers_per_group: 1
+ nworkers_per_group: 1
+ workspace: "examples/rbm/checkpoint/rbm0/"
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm1.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm1.conf b/examples/rbm/rbm1.conf
new file mode 100644
index 0000000..f9b4974
--- /dev/null
+++ b/examples/rbm/rbm1.conf
@@ -0,0 +1,135 @@
+name: "deep-big-simple-dbm"
+train_steps: 6000
+test_steps:100
+test_freq:500
+disp_freq: 100
+alg: kCD
+checkpoint_after: 500
+checkpoint_freq: 1000
+checkpoint_path: "examples/rbm/checkpoint/rbm0/checkpoint/step6000-worker0.bin"
+updater{
+ type: kSGD
+ momentum: 0.9
+ weight_decay: 0.0002
+ learning_rate{
+ base_lr: 0.1
+ type: kFixed
+ }
+}
+
+neuralnet {
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_train_shard"
+ batchsize: 100
+ }
+ exclude: kTest
+}
+
+
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_test_shard"
+ batchsize: 100
+ }
+ exclude: kTrain
+}
+
+
+layer{
+ name:"mnist"
+ type: kMnist
+ srclayers: "data"
+ mnist_conf {
+ norm_a: 255
+ norm_b: 0
+ }
+}
+
+layer{
+ name: "fc1"
+ type: kInnerProduct
+ srclayers:"mnist"
+ innerproduct_conf{
+ num_output: 1000
+ }
+ param{
+ name: "w1"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb12"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid1"
+ type: kSigmoid
+ srclayers:"fc1"
+ }
+
+layer{
+ name: "RBMVis"
+ type: kRBMVis
+ srclayers:"sigmoid1"
+ srclayers:"RBMHid"
+ rbmvis_conf{
+ num_output: 500
+ }
+ param{
+ name: "w2"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
+ }
+ param{
+ name: "rb21"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+
+layer{
+ name: "RBMHid"
+ type: kRBMHid
+ srclayers:"RBMVis"
+ rbmhid_conf{
+ hid_dim: 500
+ }
+ param{
+ name: "w2_1"
+ share_from: "w2"
+ }
+ param{
+ name: "rb22"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+}
+cluster {
+ nworker_groups: 1
+ nserver_groups: 1
+ nservers_per_group: 1
+ nworkers_per_group: 1
+ workspace: "examples/rbm/checkpoint/rbm1/"
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm2.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm2.conf b/examples/rbm/rbm2.conf
new file mode 100644
index 0000000..6629481
--- /dev/null
+++ b/examples/rbm/rbm2.conf
@@ -0,0 +1,167 @@
+name: "deep-big-simple-dbm"
+train_steps: 6000
+test_steps:100
+test_freq:100
+disp_freq: 100
+alg: kCD
+checkpoint_after: 500
+checkpoint_freq: 1000
+checkpoint_path: "examples/rbm/checkpoint/rbm1/checkpoint/step6000-worker0.bin"
+
+updater{
+ type: kSGD
+ momentum: 0.9
+ weight_decay: 0.0002
+ learning_rate{
+ base_lr: 0.1
+ type: kFixed
+ }
+}
+
+
+neuralnet {
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_train_shard"
+ batchsize: 100
+ }
+ exclude: kTest
+}
+
+
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_test_shard"
+ batchsize: 100
+ }
+ exclude: kTrain
+}
+
+
+layer{
+ name:"mnist"
+ type: kMnist
+ srclayers: "data"
+ mnist_conf {
+ norm_a: 255
+ norm_b: 0
+ }
+}
+
+layer{
+ name: "fc1"
+ type: kInnerProduct
+ srclayers:"mnist"
+ innerproduct_conf{
+ num_output: 1000
+ }
+ param{
+ name: "w1"
+ init {
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb12"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid1"
+ type: kSigmoid
+ srclayers:"fc1"
+ }
+
+layer{
+ name: "fc2"
+ type: kInnerProduct
+ srclayers:"sigmoid1"
+ innerproduct_conf{
+ num_output: 500
+ }
+ param{
+ name: "w2"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb22"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid2"
+ type: kSigmoid
+ srclayers:"fc2"
+ }
+layer{
+ name: "RBMVis"
+ type: kRBMVis
+ srclayers:"sigmoid2"
+ srclayers:"RBMHid"
+ rbmvis_conf{
+ num_output: 250
+ }
+ param{
+ name: "w3"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
+ }
+ param{
+ name: "rb31"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+
+layer{
+ name: "RBMHid"
+ type: kRBMHid
+ srclayers:"RBMVis"
+ rbmhid_conf{
+ hid_dim: 250
+ }
+ param{
+ name: "w3_1"
+ share_from: "w3"
+ }
+ param{
+ name: "rb32"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+}
+cluster {
+ nworker_groups: 1
+ nserver_groups: 1
+ nservers_per_group: 1
+ nworkers_per_group: 1
+ workspace: "examples/rbm/checkpoint/rbm2/"
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm3.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm3.conf b/examples/rbm/rbm3.conf
new file mode 100644
index 0000000..482c5e7
--- /dev/null
+++ b/examples/rbm/rbm3.conf
@@ -0,0 +1,198 @@
+name: "deep-big-simple-dbm"
+train_steps: 6000
+test_steps: 100
+test_freq: 100
+disp_freq: 100
+alg: kCD
+checkpoint_after: 500
+checkpoint_freq: 1000
+checkpoint_path: "examples/rbm/checkpoint/rbm2/checkpoint/step6000-worker0.bin"
+updater{
+ type: kSGD
+ momentum: 0.9
+ weight_decay: 0.0002
+ learning_rate{
+ base_lr: 0.001
+ type: kFixed
+ }
+}
+
+neuralnet {
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_train_shard"
+ batchsize: 100
+ }
+ exclude: kTest
+}
+
+
+layer {
+ name: "data"
+ type: kShardData
+ sharddata_conf {
+ path: "examples/rbm/mnist_test_shard"
+ batchsize: 100
+ }
+ exclude: kTrain
+}
+
+
+layer{
+ name:"mnist"
+ type: kMnist
+ srclayers: "data"
+ mnist_conf {
+ norm_a: 255
+ norm_b: 0
+ }
+}
+
+layer{
+ name: "fc1"
+ type: kInnerProduct
+ srclayers:"mnist"
+ innerproduct_conf{
+ num_output: 1000
+ }
+ param{
+ name: "w1"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb12"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid1"
+ type: kSigmoid
+ srclayers:"fc1"
+ }
+
+layer{
+ name: "fc2"
+ type: kInnerProduct
+ srclayers:"sigmoid1"
+ innerproduct_conf{
+ num_output: 500
+ }
+ param{
+ name: "w2"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb22"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid2"
+ type: kSigmoid
+ srclayers:"fc2"
+ }
+
+layer{
+ name: "fc3"
+ type: kInnerProduct
+ srclayers:"sigmoid2"
+ innerproduct_conf{
+ num_output: 250
+ }
+ param{
+ name: "w3"
+ init{
+ type: kUniform
+ low:-0.05
+ high:0.05
+ }
+ }
+ param{
+ name: "rb32"
+ init{
+ type: kUniform
+ low: -0.05
+ high:0.05
+ }
+ }
+ }
+
+ layer{
+ name: "sigmoid3"
+ type: kSigmoid
+ srclayers:"fc3"
+ }
+
+layer{
+ name: "RBMVis"
+ type: kRBMVis
+ srclayers:"sigmoid3"
+ srclayers:"RBMHid"
+ rbmvis_conf{
+ num_output: 30
+ }
+ param{
+ name: "w4"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
+ }
+ param{
+ name: "rb41"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+
+layer{
+ name: "RBMHid"
+ type: kRBMHid
+ srclayers:"RBMVis"
+ rbmhid_conf{
+ hid_dim: 30
+ gaussian: true
+ }
+ param{
+ name: "w4_1"
+ share_from: "w4"
+ }
+ param{
+ name: "rb42"
+ init{
+ type: kConstant
+ value: 0.0
+ }
+ }
+}
+}
+cluster {
+ nworker_groups: 1
+ nserver_groups: 1
+ nservers_per_group: 1
+ nworkers_per_group: 1
+ workspace: "examples/rbm/checkpoint/rbm3/"
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index 118da56..b1fbbb0 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -69,8 +69,8 @@ class DropoutLayer: public Layer {
Blob<float> mask_;
};
/**
- * RBM visible layer
- */
+ * RBM visible layer
+ */
class RBMVisLayer: public Layer {
public:
using Layer::ComputeFeature;
@@ -108,6 +108,7 @@ class RBMVisLayer: public Layer {
~RBMVisLayer();
+
private:
//! dimension of the hidden layer
int hdim_;
@@ -127,8 +128,8 @@ class RBMVisLayer: public Layer {
// in order to implement Persistent Contrastive Divergence,
};
/**
- * RBM hidden layer
- */
+ * RBM hidden layer
+ */
class RBMHidLayer: public Layer {
public:
using Layer::ComputeFeature;
@@ -154,7 +155,7 @@ class RBMHidLayer: public Layer {
return data_;
else
return hid_sample_;
- }
+ }
const vector<Param*> GetParams() const override {
vector<Param*> params{weight_, bias_};
return params;
@@ -169,6 +170,8 @@ class RBMHidLayer: public Layer {
// batchsize of negative phase
int neg_batchsize_;
float scale_;
+ // whether use gaussian sampling
+ bool gaussian_;
Blob<float> hid_sample_;
Param* weight_, *bias_;
};
@@ -184,7 +187,6 @@ class InnerProductLayer: public Layer {
void ComputeFeature(Phase phase, Metric *perf) override;
void ComputeGradient(Phase phase) override;
-
ConnectionType src_neuron_connection(int k) const override {
// CHECK_LT(k, srclayers_.size());
return kOneToAll;
@@ -201,6 +203,7 @@ class InnerProductLayer: public Layer {
//! dimension of the visible layer
int vdim_;
int batchsize_;
+ bool transpose_;
Param* weight_, *bias_;
};
@@ -246,7 +249,9 @@ class MnistLayer: public ParserLayer {
void Setup(const LayerProto& proto, int npartitions) override;
void ParseRecords(Phase phase, const vector<Record>& records,
Blob<float>* blob) override;
-
+ ConnectionType dst_layer_connection() const override {
+ return kOneToMany;
+ }
protected:
// height and width of the image after deformation
// kernel size for elastic distortion
@@ -283,6 +288,29 @@ class ReLULayer: public Layer {
void ComputeGradient(Phase phase) override;
};
+class EuclideanLossLayer: public LossLayer {
+ public:
+ using Layer::ComputeFeature;
+ using Layer::ComputeGradient;
+
+ void Setup(const LayerProto& proto, int npartitions) override;
+ void ComputeFeature(Phase phase, Metric *perf) override;
+ void ComputeGradient(Phase phase) override;
+
+
+ int partition_dim() const override {
+ CHECK_LE(layer_proto_.partition_dim(), 1);
+ return layer_proto_.partition_dim();
+ }
+ ConnectionType src_neuron_connection(int k) const override {
+ // CHECK_LT(k, srclayers_.size());
+ return kOneToAll;
+ }
+
+ private:
+ int batchsize_;
+ int dim_;
+};
class SoftmaxLossLayer: public LossLayer {
/*
@@ -344,6 +372,21 @@ class ShardDataLayer: public DataLayer{
};
/**
+ * This layer apply Sigmoid function to neuron activations.
+ * f(x)=1/(1+exp(-x))
+ * f'(x)=f(x)*(1-f(x))
+ */
+class SigmoidLayer: public Layer {
+ public:
+ using Layer::ComputeFeature;
+ using Layer::ComputeGradient;
+
+ void Setup(const LayerProto& proto, int npartitions) override;
+ void ComputeFeature(Phase phase, Metric *perf) override;
+ void ComputeGradient(Phase phase) override;
+};
+
+/**
* This layer apply Tan function to neuron activations.
* f(x)=A tanh(Bx)
* f'(x)=B/A (A*A-f(x)*f(x))
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/include/trainer/worker.h
----------------------------------------------------------------------
diff --git a/include/trainer/worker.h b/include/trainer/worker.h
index cc5a745..86b1c90 100644
--- a/include/trainer/worker.h
+++ b/include/trainer/worker.h
@@ -193,10 +193,10 @@ class BPWorker: public Worker{
class CDWorker: public Worker{
public:
~CDWorker() {}
- void Init(int thread_id, int group_id, int worker_id) override;
+ void Init(int thread_id, int grp_id, int id) override;
void TrainOneBatch(int step, Metric* perf) override;
- void TestOneBatch(int step, Phase phase,
- shared_ptr<NeuralNet> net, Metric* perf) override;
+ void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net,
+ Metric* perf) override;
void PositivePhase(int step, shared_ptr<NeuralNet> net, Metric* perf);
void NegativePhase(int step, shared_ptr<NeuralNet> net, Metric* perf);
void GradientPhase(int step, shared_ptr<NeuralNet> net);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 1bc712d..e5045a3 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -31,21 +31,23 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<ConvolutionLayer, int>(kConvolution);
RegisterLayer<ConcateLayer, int>(kConcate);
RegisterLayer<DropoutLayer, int>(kDropout);
+ RegisterLayer<EuclideanLossLayer, int>(kEuclideanLoss);
RegisterLayer<InnerProductLayer, int>(kInnerProduct);
RegisterLayer<LabelLayer, int>(kLabel);
RegisterLayer<LRNLayer, int>(kLRN);
RegisterLayer<MnistLayer, int>(kMnist);
RegisterLayer<PrefetchLayer, int>(kPrefetch);
RegisterLayer<PoolingLayer, int>(kPooling);
+ RegisterLayer<RBMHidLayer, int>(kRBMHid);
+ RegisterLayer<RBMVisLayer, int>(kRBMVis);
RegisterLayer<RGBImageLayer, int>(kRGBImage);
RegisterLayer<ReLULayer, int>(kReLU);
RegisterLayer<ShardDataLayer, int>(kShardData);
+ RegisterLayer<SigmoidLayer, int>(kSigmoid);
RegisterLayer<SliceLayer, int>(kSlice);
RegisterLayer<SoftmaxLossLayer, int>(kSoftmaxLoss);
RegisterLayer<SplitLayer, int>(kSplit);
RegisterLayer<TanhLayer, int>(kTanh);
- RegisterLayer<RBMVisLayer, int>(kRBMVis);
- RegisterLayer<RBMHidLayer, int>(kRBMHid);
#ifdef USE_LMDB
RegisterLayer<LMDBDataLayer, int>(kLMDBData);
#endif
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index ae45ae8..b5c986e 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -163,6 +163,7 @@ RBMVisLayer::~RBMVisLayer() {
delete weight_;
delete bias_;
}
+
void RBMVisLayer::Setup(const LayerProto& proto,
int npartitions) {
Layer::Setup(proto, npartitions);
@@ -188,7 +189,7 @@ void RBMVisLayer::Setup(const LayerProto& proto,
vis_sample_.Reshape(vector<int>{neg_batchsize_, vdim_});
weight_ = Param::Create(proto.param(0));
bias_ = Param::Create(proto.param(1));
- weight_->Setup(proto.param(0), vector<int>{vdim_, hdim_});
+ weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_});
bias_->Setup(proto.param(1), vector<int>{vdim_});
}
@@ -199,24 +200,15 @@ void RBMVisLayer::ComputeFeature(Phase phase, Metric* perf) {
auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this));
Copy(data, src);
} else if (phase == kNegative) { /*negative phase*/
- if (is_first_iteration_vis_) {
- CHECK_EQ(srclayers_[data_idx_]->data(this).count(), batchsize_*vdim_);
- auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this));
- auto vis_sample = Tensor2(&vis_sample_);
- Copy(vis_sample, src);
- is_first_iteration_vis_ = false;
- } else {
- auto hid_sample =
- Tensor2(srclayers_[hid_idx_]->mutable_data(this, kNegative));
- // fetch sampling results from hidden layer
- auto vis_sample = Tensor2(&vis_sample_);
- auto weight = Tensor2(weight_->mutable_data());
- auto bias = Tensor1(bias_->mutable_data());
- vis_sample = dot(hid_sample, weight.T());
- vis_sample+=repmat(bias, neg_batchsize_);
- vis_sample = F<op::sigmoid>(vis_sample);
- TSingleton<Random<cpu>>::Instance()->SampleBinary(vis_sample);
- }
+ auto hid_sample =
+ Tensor2(srclayers_[hid_idx_]->mutable_data(this, kNegative));
+ // fetch sampling results from hidden layer
+ auto vis_sample = Tensor2(&vis_sample_);
+ auto weight = Tensor2(weight_->mutable_data());
+ auto bias = Tensor1(bias_->mutable_data());
+ vis_sample = dot(hid_sample, weight);
+ vis_sample+=repmat(bias, neg_batchsize_);
+ vis_sample = F<op::sigmoid>(vis_sample);
}
}
@@ -231,14 +223,14 @@ void RBMVisLayer::ComputeGradient(Phase phase) {
auto gbias = Tensor1(bias_->mutable_grad());
gbias = sum_rows(vis_sample);
gbias -= sum_rows(data);
- gweight = dot(vis_sample.T(), hid_sample);
- gweight -= dot(data.T(), hid_data);
+ gweight = dot(hid_sample.T(), vis_sample);
+ gweight -= dot(hid_data.T(), data);
gbias*=(1.0f)/(1.0f*batchsize_);
gweight*=(1.0f)/(1.0f*batchsize_);
}
void RBMVisLayer::ComputeLoss(Metric* perf) {
- float loss = (0.0f);
+ float loss_sqr = (0.0f);
CHECK_EQ(srclayers_[data_idx_]->data(this).count(), batchsize_*vdim_);
auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this));
auto hid_data = Tensor2(srclayers_[hid_idx_]->mutable_data(this, kPositive));
@@ -247,24 +239,26 @@ void RBMVisLayer::ComputeLoss(Metric* perf) {
auto bias = Tensor1(bias_->mutable_data());
Tensor<cpu, 2> reconstruct(Shape2(batchsize_, vdim_)); /*reconstruct error*/
AllocSpace(reconstruct);
- reconstruct = dot(hid_data, weight.T());
+ reconstruct = dot(hid_data, weight);
reconstruct+=repmat(bias, batchsize_);
reconstruct = F<op::sigmoid>(reconstruct);
float *src_dptr = src.dptr;
- float *reconstruct_dptr = reconstruct.dptr;
- for (int i = 0; i < vdim_*batchsize_; i++)
- loss += -(src_dptr[i]*log(reconstruct_dptr[i])
- +(1-src_dptr[i])*log(1-reconstruct_dptr[i]));
- loss/=batchsize_;
+ for (int i = 0; i < vdim_*batchsize_; i++) {
+ int recon_row = i / vdim_;
+ int recon_col = i - recon_row * vdim_;
+ loss_sqr += (src_dptr[i] - reconstruct[recon_row][recon_col]) *
+ (src_dptr[i] - reconstruct[recon_row][recon_col]);
+ }
FreeSpace(reconstruct);
perf->Reset();
- perf->Add("reconstruct_error", loss);
+ perf->Add("sqr_reconstruct_error", loss_sqr);
}
/**************** Implementation for RBMHidLayer********************/
RBMHidLayer::~RBMHidLayer() {
delete weight_;
delete bias_;
}
+
void RBMHidLayer::Setup(const LayerProto& proto,
int npartitions) {
Layer::Setup(proto, npartitions);
@@ -276,24 +270,44 @@ void RBMHidLayer::Setup(const LayerProto& proto,
neg_batchsize_ = src_sample.shape()[0];
vdim_ = src_data.count()/batchsize_;
hdim_ = proto.rbmhid_conf().hid_dim();
+ gaussian_ = proto.rbmhid_conf().gaussian();
data_.Reshape(vector<int>{batchsize_, hdim_});
hid_sample_.Reshape(vector<int>{neg_batchsize_, hdim_});
weight_ = Param::Create(proto.param(0));
bias_ = Param::Create(proto.param(1));
- weight_->Setup(proto.param(0), vector<int>{vdim_, hdim_});
bias_->Setup(proto.param(1), vector<int>{hdim_});
+ weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_});
}
void RBMHidLayer::ComputeFeature(Phase phase, Metric* perf) {
if (phase == kPositive) { /*postive phase*/
auto data = Tensor2(&data_);
+
+ auto hid_sample = Tensor2(&hid_sample_);
+
CHECK_EQ(srclayers_[0]->data(this, kPositive).count(), batchsize_*vdim_);
auto src = Tensor2(srclayers_[0]->mutable_data(this, kPositive));
auto weight = Tensor2(weight_->mutable_data());
auto bias = Tensor1(bias_->mutable_data());
- data = dot(src, weight);
+ data = dot(src, weight.T());
data += repmat(bias, batchsize_);
- data = F<op::sigmoid>(data);
+
+ if (!gaussian_)
+ data = F<op::sigmoid>(data);
+
+ Copy(hid_sample, data);
+
+ if (gaussian_) { // first gibbs
+ Tensor<cpu, 2> gaussian_sample(Shape2(batchsize_, hdim_));
+ AllocSpace(gaussian_sample);
+ auto random = TSingleton<Random<cpu>>::Instance();
+ random->SampleGaussian(gaussian_sample, 0.0f, 1.0f);
+ hid_sample += gaussian_sample;
+ FreeSpace(gaussian_sample);
+ } else {
+ TSingleton<Random<cpu>>::Instance()->SampleBinary(hid_sample);
+ }
+
} else if (phase == kNegative) { /*negative phase*/
CHECK_EQ(srclayers_[0]->data(this, kNegative).count(),
neg_batchsize_*vdim_);
@@ -301,15 +315,25 @@ void RBMHidLayer::ComputeFeature(Phase phase, Metric* perf) {
auto hid_sample = Tensor2(&hid_sample_);
auto bias = Tensor1(bias_->mutable_data());
auto weight = Tensor2(weight_->mutable_data());
- hid_sample = dot(src_sample, weight);
+ hid_sample = dot(src_sample, weight.T());
hid_sample += repmat(bias, neg_batchsize_);
- hid_sample = F<op::sigmoid>(hid_sample);
- TSingleton<Random<cpu>>::Instance()->SampleBinary(hid_sample);
+ if (!gaussian_)
+ hid_sample = F<op::sigmoid>(hid_sample);
} else if (phase == kLoss) { /*test phase*/
- auto data = Tensor2(&data_); // data: sigmoid(Wv+b)
- TSingleton<Random<cpu>>::Instance()->SampleBinary(data);
+ auto data = Tensor2(&data_); // data: sigmoid(Wv+b)
+ if (gaussian_) {
+ Tensor<cpu, 2> gaussian_sample(Shape2(batchsize_, hdim_));
+ AllocSpace(gaussian_sample);
+ auto random = TSingleton<Random<cpu>>::Instance();
+ random->SampleGaussian(gaussian_sample, 0.0f, 1.0f);
+ data += gaussian_sample;
+ FreeSpace(gaussian_sample);
+ }
+ else
+ TSingleton<Random<cpu>>::Instance()->SampleBinary(data);
}
}
+
void RBMHidLayer::ComputeGradient(Phase phase) {
auto data = Tensor2(&data_);
auto hid_sample = Tensor2(&hid_sample_);
@@ -326,17 +350,21 @@ InnerProductLayer::~InnerProductLayer() {
void InnerProductLayer::Setup(const LayerProto& proto, int npartitions) {
Layer::Setup(proto, npartitions);
CHECK_EQ(srclayers_.size(), 1);
- const auto& src=srclayers_[0]->data(this);
- batchsize_=src.shape()[0];
- vdim_=src.count()/batchsize_;
- hdim_=proto.innerproduct_conf().num_output();
- if(partition_dim()>0)
+ const auto& src = srclayers_[0]->data(this);
+ batchsize_ = src.shape()[0];
+ vdim_ = src.count()/batchsize_;
+ hdim_ = proto.innerproduct_conf().num_output();
+ transpose_ = proto.innerproduct_conf().transpose();
+ if (partition_dim() > 0)
hdim_ /= npartitions;
data_.Reshape(vector<int>{batchsize_, hdim_});
grad_.ReshapeLike(data_);
weight_ = Param::Create(proto.param(0));
bias_ = Param::Create(proto.param(1));
- weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_});
+ if (transpose_)
+ weight_->Setup(proto.param(0), vector<int>{vdim_, hdim_});
+ else
+ weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_});
bias_->Setup(proto.param(1), vector<int>{hdim_});
}
@@ -345,7 +373,10 @@ void InnerProductLayer::ComputeFeature(Phase phase, Metric* perf) {
auto src = Tensor2(srclayers_[0]->mutable_data(this));
auto weight = Tensor2(weight_->mutable_data());
auto bias = Tensor1(bias_->mutable_data());
- data=dot(src, weight.T());
+ if (transpose_)
+ data = dot(src, weight);
+ else
+ data = dot(src, weight.T());
// repmat: repeat bias vector into batchsize rows
data+=repmat(bias, batchsize_);
}
@@ -357,11 +388,17 @@ void InnerProductLayer::ComputeGradient(Phase phas) {
auto gweight = Tensor2(weight_->mutable_grad());
auto gbias = Tensor1(bias_->mutable_grad());
- gbias=sum_rows(grad);
- gweight=dot(grad.T(), src);
- if(srclayers_[0]->mutable_grad(this)!=nullptr){
+ gbias = sum_rows(grad);
+ if (transpose_)
+ gweight = dot(src.T(), grad);
+ else
+ gweight = dot(grad.T(), src);
+ if (srclayers_[0]->mutable_grad(this) != nullptr) {
auto gsrc = Tensor2(srclayers_[0]->mutable_grad(this));
- gsrc=dot(grad, weight);
+ if (transpose_)
+ gsrc = dot(grad, weight.T());
+ else
+ gsrc = dot(grad, weight);
}
}
/*****************************************************************************
@@ -703,6 +740,25 @@ ShardDataLayer::~ShardDataLayer() {
delete shard_;
shard_ = nullptr;
}
+/*******************Implementation of SigmoidLayer***************************/
+void SigmoidLayer::Setup(const LayerProto& proto, int npartitions) {
+ Layer::Setup(proto, npartitions);
+ data_.ReshapeLike(srclayers_[0]->data(this));
+ grad_.ReshapeLike(srclayers_[0]->grad(this));
+}
+
+void SigmoidLayer::ComputeFeature(Phase phase, Metric* perf) {
+ auto data = Tensor1(&data_);
+ auto src = Tensor1(srclayers_[0]->mutable_data(this));
+ data = F<op::sigmoid>(src);
+}
+
+void SigmoidLayer::ComputeGradient(Phase phase) {
+ auto data = Tensor1(&data_);
+ auto grad = Tensor1(&grad_);
+ auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this));
+ gsrc = F<op::sigmoid_grad>(data)*grad;
+}
/*******************Implementation of TanLayer***************************/
void TanhLayer::Setup(const LayerProto& proto, int npartitions){
Layer::Setup(proto, npartitions);
@@ -722,6 +778,45 @@ void TanhLayer::ComputeGradient(Phase phase) {
auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this));
gsrc=F<op::stanh_grad>(data)*grad;
}
+/********** * Implementation for EuclideanLossLayer*************************/
+void EuclideanLossLayer::Setup(const LayerProto& proto, int npartitions) {
+ LossLayer::Setup(proto, npartitions);
+ CHECK_EQ(srclayers_.size(), 2);
+ data_.Reshape(srclayers_[0]->data(this).shape());
+ batchsize_ = data_.shape()[0];
+ dim_ = data_.count()/batchsize_;
+ metric_.Reshape(vector<int>{1});
+}
+void EuclideanLossLayer::ComputeFeature(Phase phase, Metric* perf) {
+ const float* reconstruct_dptr = srclayers_[0]->data(this).cpu_data();
+ const float* input_dptr = srclayers_[1]->data(this).cpu_data();
+ float loss = 0;
+ for (int n = 0; n < batchsize_; n++) {
+ for (int j = 0; j < dim_; ++j) {
+ loss += (input_dptr[j] - reconstruct_dptr[j]) *
+ (input_dptr[j] - reconstruct_dptr[j]);
+ }
+ reconstruct_dptr+=dim_;
+ input_dptr+=dim_;
+ }
+ CHECK_EQ(reconstruct_dptr,
+ srclayers_[0]->data(this).cpu_data() + (batchsize_*dim_));
+ CHECK_EQ(input_dptr,
+ srclayers_[1]->data(this).cpu_data() + (batchsize_*dim_));
+ perf->Add("loss", loss/(1.0f*batchsize_));
+}
+void EuclideanLossLayer::ComputeGradient(Phase phase) {
+ const float* reconstruct_dptr = srclayers_[0]->data(this).cpu_data();
+ const float* input_dptr = srclayers_[1]->data(this).cpu_data();
+ Blob<float>* gsrcblob = srclayers_[0]->mutable_grad(this);
+ float* gsrcptr = gsrcblob->mutable_cpu_data();
+ for (int n = 0; n < batchsize_; n++) {
+ for (int j = 0; j < dim_; j++)
+ gsrcptr[n*dim_+j]= 2 * (reconstruct_dptr[n*dim_+j]-input_dptr[n*dim_+j]);
+ }
+ Tensor<cpu, 1> gsrc(gsrcptr, Shape1(gsrcblob->count()));
+ gsrc*=1.0f/(1.0f*batchsize_);
+}
/********** * Implementation for SoftmaxLossLayer*************************/
void SoftmaxLossLayer::Setup(const LayerProto& proto, int npartitions) {
LossLayer::Setup(proto, npartitions);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index b4abe68..7f030ac 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -166,6 +166,8 @@ message LayerProto {
optional ConcateProto concate_conf = 31;
// configuration for dropout layer
optional DropoutProto dropout_conf = 33;
+ // configuration for euclideanloss layer
+ optional EuclideanLossProto euclideanloss_conf = 50;
// configuration for inner product layer
optional InnerProductProto innerproduct_conf = 34;
// configuration for local response normalization layer
@@ -178,6 +180,10 @@ message LayerProto {
optional PoolingProto pooling_conf = 37;
// configuration for prefetch layer
optional PrefetchProto prefetch_conf = 44;
+ // configuration for rbmhid layer
+ optional RBMHidProto rbmhid_conf = 49;
+ // configuration for rbmvis layer
+ optional RBMVisProto rbmvis_conf = 48;
// configuration for rectified linear unit layer
optional ReLUProto relu_conf = 38;
// configuration for rgb image parser layer
@@ -192,10 +198,7 @@ message LayerProto {
optional SplitProto split_conf = 42;
// configuration for tanh layer
optional TanhProto tanh_conf = 43;
- // configuration for rbmvis layer
- optional RBMVisProto rbmvis_conf = 48;
- // configuration for rbmhid layer
- optional RBMHidProto rbmhid_conf = 49;
+
// overrides the partition dimension for neural net
optional int32 partition_dim = 60 [default = -1];
@@ -299,6 +302,9 @@ message TanhProto {
optional float inner_scale = 2 [default = 1.0];
}
+message EuclideanLossProto {
+}
+
message SoftmaxLossProto {
// computing accuracy against topk results
optional int32 topk = 1 [default = 1];
@@ -367,6 +373,7 @@ message RBMVisProto {
message RBMHidProto {
optional int32 hid_dim = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
+ optional bool gaussian = 3 [default = false]; // use gaussian sampling or not
}
// Message that stores parameters used by InnerProductLayer
@@ -375,6 +382,8 @@ message InnerProductProto {
required int32 num_output = 1;
// use bias vector or not
optional bool bias_term = 30 [default = true];
+ // transpose or not
+ optional bool transpose = 31 [default = false];
}
message LRNProto {
@@ -524,12 +533,14 @@ enum LayerType {
kLRN = 6;
kPooling = 8;
kReLU = 9;
- kRBMHid = 24;
kRBMVis = 23;
+ kRBMHid = 24;
+ kSigmoid = 26;
kTanh = 14;
// Loss layers
// - Compute objective loss
kSoftmaxLoss = 11;
+ kEuclideanLoss = 25;
// Other layers
// - Connect layers when neural net is partitioned
kBridgeDst = 16;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/trainer/worker.cc
----------------------------------------------------------------------
diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc
index e047367..f112b17 100644
--- a/src/trainer/worker.cc
+++ b/src/trainer/worker.cc
@@ -380,7 +380,6 @@ void BPWorker::TestOneBatch(int step, Phase phase,
shared_ptr<NeuralNet> net, Metric* perf) {
Forward(step, phase, net, perf);
}
-
/****************************CDWorker**********************************/
void CDWorker::Init(int thread_id, int group_id, int worker_id) {
Worker::Init(thread_id, group_id, worker_id);
@@ -389,8 +388,11 @@ void CDWorker::Init(int thread_id, int group_id, int worker_id) {
void CDWorker::PositivePhase(int step,
shared_ptr<NeuralNet> net, Metric* perf) {
auto& layers = net->layers();
+ // LOG(ERROR)<<"Positive Phase";
for (auto& layer : layers) {
- // clock_t s=clock();
+ for (Param* p : layer->GetParams()) { // wait until param is updated
+ Collect(p, step);
+ }
layer->ComputeFeature(kPositive, perf);
}
}
@@ -399,33 +401,39 @@ void CDWorker::NegativePhase(int step,
shared_ptr<NeuralNet> net, Metric* perf) {
// for negative phase, gibbs sampling only concerns RBM bottom and top layer
auto& layers = net->layers();
- for (int i = 0; i < job_conf_.cd_conf().pcd_k(); i++) {
+ // LOG(ERROR)<<"Negative Phase";
for (auto& layer : layers) {
- if (layer->is_vislayer() || layer->is_hidlayer())
+ if (layer->is_vislayer() || layer->is_hidlayer()) {
layer->ComputeFeature(kNegative, perf);
+ }
}
- }
}
void CDWorker::GradientPhase(int step, shared_ptr<NeuralNet> net) {
auto& layers = net->layers();
+ // LOG(ERROR)<<"Gradient Phase";
for (auto& layer : layers) {
+ if (layer->is_vislayer() || layer->is_hidlayer()) {
layer->ComputeGradient(kTrain);
for (Param* p : layer->GetParams()) {
Update(p, step);
}
+ }
}
}
void CDWorker::LossPhase(int step, shared_ptr<NeuralNet> net, Metric* perf) {
auto& layers = net->layers();
+ // LOG(ERROR)<<"Loss Phase";
for (auto& layer : layers) {
- if (layer->is_hidlayer())
+ if (layer->is_hidlayer()) {
layer->ComputeFeature(kLoss, perf);
+ }
}
for (auto& layer : layers) {
- if (layer->is_vislayer())
+ if (layer->is_vislayer()) {
layer->ComputeLoss(perf);
+ }
}
}
[2/2] incubator-singa git commit: SINGA-9 Add Support for Restricted
Boltzman Machine (RBM) model
Posted by wa...@apache.org.
SINGA-9 Add Support for Restricted Boltzman Machine (RBM) model
* Refactor the CDWorker::TrainOneBatch.
* Replace Phase with int flag in ComputeFeature ComputeGradient
* functions. The flag can be a combination for multiple phases, e.g.,
* kTrain|kForward, where each phase is set to have only one none zero bit,
e.g., 1, 2, 4, 8, etc.
* Remove compliation dependency on opencv.
* Refactor JobProto to create a AlgProto for TrainOneBatch.
* Create a RBMLayer as the base layer for RBM layers.
* Updated all configurations of all examples.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/fbbcaafd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/fbbcaafd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/fbbcaafd
Branch: refs/heads/master
Commit: fbbcaafdba3e885eab44c0dcfd23829c2c80f732
Parents: ef4de79
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 27 21:50:08 2015 +0800
Committer: wangwei <wa...@comp.nus.edu.sg>
Committed: Fri Aug 28 17:18:38 2015 +0800
----------------------------------------------------------------------
Makefile.am | 3 -
configure.ac | 18 +-
examples/cifar10/job.conf | 5 +-
examples/mnist/conv.conf | 4 +-
examples/mnist/job.conf | 28 ++-
examples/rbm/autoencoder.conf | 89 ++--------
examples/rbm/rbm0.conf | 41 ++---
examples/rbm/rbm1.conf | 85 ++++-----
examples/rbm/rbm2.conf | 57 ++----
examples/rbm/rbm3.conf | 68 ++------
include/mshadow/tensor_random.h | 17 +-
include/neuralnet/base_layer.h | 91 ++++++----
include/neuralnet/layer.h | 146 ++++------------
include/trainer/worker.h | 6 -
include/utils/param.h | 3 +-
src/driver.cc | 8 +-
src/neuralnet/base_layer.cc | 49 +++---
src/neuralnet/layer.cc | 324 ++++++++++++++++-------------------
src/proto/job.proto | 47 +++--
src/trainer/worker.cc | 96 ++++-------
src/utils/common.cc | 2 +-
src/utils/param.cc | 10 +-
22 files changed, 497 insertions(+), 700 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index fa28848..ae8a9dd 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -95,9 +95,6 @@ singa_LDFLAGS = -I./include \
-lglog \
-lprotobuf \
-lrt \
- -lopencv_highgui \
- -lopencv_imgproc \
- -lopencv_core \
-lopenblas \
-lzmq \
-lczmq \
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/configure.ac
----------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index 35c6d61..569c406 100644
--- a/configure.ac
+++ b/configure.ac
@@ -44,15 +44,15 @@ if test x"$enable_lmdb" = x"yes"; then
AC_DEFINE(LMDB, 1, [Enable Option layer])
fi
-AC_CHECK_LIB([opencv_imgproc], [main], [], [
- AC_MSG_ERROR([unable to find opencv_imgproc lib])
- ])
-AC_CHECK_LIB([opencv_highgui], [main], [], [
- AC_MSG_ERROR([unable to find opencv_highgui lib])
- ])
-AC_CHECK_LIB([opencv_core], [main], [], [
- AC_MSG_ERROR([unable to find opencv_core lib])
- ])
+#AC_CHECK_LIB([opencv_imgproc], [main], [], [
+# AC_MSG_ERROR([unable to find opencv_imgproc lib])
+# ])
+#AC_CHECK_LIB([opencv_highgui], [main], [], [
+# AC_MSG_ERROR([unable to find opencv_highgui lib])
+# ])
+#AC_CHECK_LIB([opencv_core], [main], [], [
+# AC_MSG_ERROR([unable to find opencv_core lib])
+# ])
AC_CHECK_LIB([zookeeper_mt], [main], [], [
AC_MSG_ERROR([unable to find zookeeper])
])
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/cifar10/job.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/job.conf b/examples/cifar10/job.conf
index f44ca50..9d25904 100644
--- a/examples/cifar10/job.conf
+++ b/examples/cifar10/job.conf
@@ -3,7 +3,10 @@ train_steps: 1000
test_steps: 100
test_freq:300
disp_freq:30
-alg: kBP
+debug: true
+train_one_batch {
+ alg: kBP
+}
updater{
type: kSGD
weight_decay:0.004
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/mnist/conv.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/conv.conf b/examples/mnist/conv.conf
index 1d4d740..aaf34f2 100644
--- a/examples/mnist/conv.conf
+++ b/examples/mnist/conv.conf
@@ -3,7 +3,9 @@ train_steps: 10000
test_steps:100
test_freq:500
disp_freq:50
-alg: kBP
+train_one_batch {
+ alg: kBP
+}
updater {
momentum:0.9
weight_decay:0.0005
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/mnist/job.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/job.conf b/examples/mnist/job.conf
index 360e1ec..b8d14e8 100644
--- a/examples/mnist/job.conf
+++ b/examples/mnist/job.conf
@@ -3,7 +3,9 @@ train_steps: 1000
test_steps:10
test_freq:60
disp_freq:10
-alg: kBP
+train_one_batch {
+ alg: kBP
+}
updater{
type: kSGD
learning_rate{
@@ -82,6 +84,10 @@ neuralnet {
layer{
name: "tanh1"
type: kTanh
+ tanh_conf {
+ outer_scale: 1.7159047
+ inner_scale: 0.6666667
+ }
srclayers:"fc1"
}
layer{
@@ -112,6 +118,11 @@ neuralnet {
layer{
name: "tanh2"
type: kTanh
+ tanh_conf {
+ outer_scale: 1.7159047
+ inner_scale: 0.6666667
+ }
+
srclayers:"fc2"
}
layer{
@@ -143,6 +154,11 @@ neuralnet {
layer{
name: "tanh3"
type: kTanh
+ tanh_conf {
+ outer_scale: 1.7159047
+ inner_scale: 0.6666667
+ }
+
srclayers:"fc3"
}
layer{
@@ -174,6 +190,11 @@ neuralnet {
layer{
name: "tanh4"
type: kTanh
+ tanh_conf {
+ outer_scale: 1.7159047
+ inner_scale: 0.6666667
+ }
+
srclayers:"fc4"
}
layer{
@@ -205,6 +226,11 @@ neuralnet {
layer{
name: "tanh5"
type: kTanh
+ tanh_conf {
+ outer_scale: 1.7159047
+ inner_scale: 0.6666667
+ }
+
srclayers:"fc5"
}
layer{
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/rbm/autoencoder.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/autoencoder.conf b/examples/rbm/autoencoder.conf
index 9575323..bc32cc7 100644
--- a/examples/rbm/autoencoder.conf
+++ b/examples/rbm/autoencoder.conf
@@ -1,15 +1,15 @@
-name: "deep-big-simple-mlp"
+name: "auto-encoder"
train_steps: 12200
test_steps:100
-test_freq:100
-disp_freq:20
-checkpoint_after: 1000
-checkpoint_freq: 1000
-checkpoint_path: "examples/rbm/checkpoint/rbm0/checkpoint/step6000-worker0.bin"
-checkpoint_path: "examples/rbm/checkpoint/rbm1/checkpoint/step6000-worker0.bin"
-checkpoint_path: "examples/rbm/checkpoint/rbm2/checkpoint/step6000-worker0.bin"
-checkpoint_path: "examples/rbm/checkpoint/rbm3/checkpoint/step6000-worker0.bin"
-alg: kBP
+test_freq:1000
+disp_freq:100
+checkpoint_path: "examples/rbm/rbm0/checkpoint/step6000-worker0.bin"
+checkpoint_path: "examples/rbm/rbm1/checkpoint/step6000-worker0.bin"
+checkpoint_path: "examples/rbm/rbm2/checkpoint/step6000-worker0.bin"
+checkpoint_path: "examples/rbm/rbm3/checkpoint/step6000-worker0.bin"
+train_one_batch{
+ alg: kBP
+}
updater{
type: kAdaGrad
learning_rate{
@@ -23,7 +23,7 @@ neuralnet {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_train_shard"
+ path: "examples/mnist/mnist_train_shard"
batchsize: 1000
}
exclude: kTest
@@ -33,7 +33,7 @@ neuralnet {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_test_shard"
+ path: "examples/mnist/mnist_test_shard"
batchsize: 1000
}
exclude: kTrain
@@ -64,19 +64,9 @@ neuralnet {
}
param{
name: "w1"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb12"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -94,19 +84,9 @@ neuralnet {
}
param{
name: "w2"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb22"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -125,19 +105,9 @@ neuralnet {
}
param{
name: "w3"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb32"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -156,19 +126,10 @@ neuralnet {
}
param{
name: "w4"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb42"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
+
}
}
@@ -187,11 +148,6 @@ neuralnet {
}
param{
name: "rb41"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -214,13 +170,7 @@ neuralnet {
}
param{
name: "rb31"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
-
}
layer{
@@ -242,11 +192,6 @@ neuralnet {
}
param{
name: "rb21"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -270,13 +215,7 @@ neuralnet {
}
param{
name: "rb11"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
-
}
layer{
@@ -295,5 +234,5 @@ neuralnet {
cluster {
nworker_groups: 1
nserver_groups: 1
- workspace: "examples/rbm/checkpoint/autoencoder/"
+ workspace: "examples/rbm/autoencoder/"
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/rbm/rbm0.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm0.conf b/examples/rbm/rbm0.conf
index ef8653f..a7e503b 100644
--- a/examples/rbm/rbm0.conf
+++ b/examples/rbm/rbm0.conf
@@ -1,11 +1,11 @@
-name: "deep-big-simple-dbm"
+name: "rbm0"
train_steps: 6000
test_steps:100
test_freq:100
disp_freq: 100
-alg: kCD
-checkpoint_after: 500
-checkpoint_freq: 1000
+train_one_batch{
+ alg: kCD
+}
updater{
type: kSGD
momentum: 0.9
@@ -21,7 +21,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_train_shard"
+ path: "examples/mnist/mnist_train_shard"
batchsize: 100
}
exclude: kTest
@@ -32,7 +32,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_test_shard"
+ path: "examples/mnist/mnist_test_shard"
batchsize: 100
}
exclude: kTrain
@@ -54,22 +54,15 @@ layer{
type: kRBMVis
srclayers:"mnist"
srclayers:"RBMHid"
- rbmvis_conf{
- num_output: 1000
- }
param{
- name: "w1"
- init{
- type: kGaussian
- mean: 0.0
- std: 0.1
- }
+ name: "w1_"
+ share_from: "w1"
}
param{
name: "rb11"
init{
- type: kConstant
- value: 0.0
+ type: kConstant
+ value: 0.0
}
}
}
@@ -82,14 +75,18 @@ layer{
hid_dim: 1000
}
param{
- name: "w1_1"
- share_from: "w1"
+ name: "w1"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
}
param{
name: "rb12"
init{
- type: kConstant
- value: 0.0
+ type: kConstant
+ value: 0.0
}
}
}
@@ -99,5 +96,5 @@ cluster {
nserver_groups: 1
nservers_per_group: 1
nworkers_per_group: 1
- workspace: "examples/rbm/checkpoint/rbm0/"
+ workspace: "examples/rbm/rbm0/"
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/rbm/rbm1.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm1.conf b/examples/rbm/rbm1.conf
index f9b4974..db27d3a 100644
--- a/examples/rbm/rbm1.conf
+++ b/examples/rbm/rbm1.conf
@@ -1,12 +1,12 @@
-name: "deep-big-simple-dbm"
+name: "rbm1"
train_steps: 6000
test_steps:100
-test_freq:500
+test_freq:1000
disp_freq: 100
-alg: kCD
-checkpoint_after: 500
-checkpoint_freq: 1000
-checkpoint_path: "examples/rbm/checkpoint/rbm0/checkpoint/step6000-worker0.bin"
+train_one_batch{
+ alg: kCD
+}
+checkpoint_path: "examples/rbm/rbm0/checkpoint/step6000-worker0.bin"
updater{
type: kSGD
momentum: 0.9
@@ -22,7 +22,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_train_shard"
+ path: "examples/mnist/mnist_train_shard"
batchsize: 100
}
exclude: kTest
@@ -33,7 +33,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_test_shard"
+ path: "examples/mnist/mnist_test_shard"
batchsize: 100
}
exclude: kTrain
@@ -51,51 +51,34 @@ layer{
}
layer{
- name: "fc1"
- type: kInnerProduct
- srclayers:"mnist"
- innerproduct_conf{
- num_output: 1000
- }
- param{
- name: "w1"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
- }
- param{
- name: "rb12"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
- }
+ name: "fc1"
+ type: kInnerProduct
+ srclayers:"mnist"
+ innerproduct_conf{
+ num_output: 1000
}
-
- layer{
- name: "sigmoid1"
- type: kSigmoid
- srclayers:"fc1"
+ param{
+ name: "w1"
}
+ param{
+ name: "rb12"
+ }
+}
+
+layer{
+ name: "sigmoid1"
+ type: kSigmoid
+ srclayers:"fc1"
+}
layer{
name: "RBMVis"
type: kRBMVis
srclayers:"sigmoid1"
srclayers:"RBMHid"
- rbmvis_conf{
- num_output: 500
- }
param{
- name: "w2"
- init{
- type: kGaussian
- mean: 0.0
- std: 0.1
- }
+ name: "w2_"
+ share_from: "w2"
}
param{
name: "rb21"
@@ -114,14 +97,18 @@ layer{
hid_dim: 500
}
param{
- name: "w2_1"
- share_from: "w2"
+ name: "w2"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
}
param{
name: "rb22"
init{
- type: kConstant
- value: 0.0
+ type: kConstant
+ value: 0.0
}
}
}
@@ -131,5 +118,5 @@ cluster {
nserver_groups: 1
nservers_per_group: 1
nworkers_per_group: 1
- workspace: "examples/rbm/checkpoint/rbm1/"
+ workspace: "examples/rbm/rbm1/"
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/rbm/rbm2.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm2.conf b/examples/rbm/rbm2.conf
index 6629481..fd08907 100644
--- a/examples/rbm/rbm2.conf
+++ b/examples/rbm/rbm2.conf
@@ -1,12 +1,12 @@
-name: "deep-big-simple-dbm"
+name: "rbm2"
train_steps: 6000
test_steps:100
-test_freq:100
+test_freq:1000
disp_freq: 100
-alg: kCD
-checkpoint_after: 500
-checkpoint_freq: 1000
-checkpoint_path: "examples/rbm/checkpoint/rbm1/checkpoint/step6000-worker0.bin"
+train_one_batch{
+ alg: kCD
+}
+checkpoint_path: "examples/rbm/rbm1/checkpoint/step6000-worker0.bin"
updater{
type: kSGD
@@ -24,7 +24,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_train_shard"
+ path: "examples/mnist/mnist_train_shard"
batchsize: 100
}
exclude: kTest
@@ -35,7 +35,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_test_shard"
+ path: "examples/mnist/mnist_test_shard"
batchsize: 100
}
exclude: kTrain
@@ -61,19 +61,9 @@ layer{
}
param{
name: "w1"
- init {
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb12"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -92,19 +82,9 @@ layer{
}
param{
name: "w2"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb22"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -118,16 +98,9 @@ layer{
type: kRBMVis
srclayers:"sigmoid2"
srclayers:"RBMHid"
- rbmvis_conf{
- num_output: 250
- }
param{
- name: "w3"
- init{
- type: kGaussian
- mean: 0.0
- std: 0.1
- }
+ name: "w3_"
+ share_from: "w3"
}
param{
name: "rb31"
@@ -146,8 +119,12 @@ layer{
hid_dim: 250
}
param{
- name: "w3_1"
- share_from: "w3"
+ name: "w3"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
}
param{
name: "rb32"
@@ -163,5 +140,5 @@ cluster {
nserver_groups: 1
nservers_per_group: 1
nworkers_per_group: 1
- workspace: "examples/rbm/checkpoint/rbm2/"
+ workspace: "examples/rbm/rbm2/"
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/examples/rbm/rbm3.conf
----------------------------------------------------------------------
diff --git a/examples/rbm/rbm3.conf b/examples/rbm/rbm3.conf
index 482c5e7..fe7cc1f 100644
--- a/examples/rbm/rbm3.conf
+++ b/examples/rbm/rbm3.conf
@@ -1,12 +1,12 @@
-name: "deep-big-simple-dbm"
+name: "rbm3"
train_steps: 6000
test_steps: 100
-test_freq: 100
+test_freq: 1000
disp_freq: 100
-alg: kCD
-checkpoint_after: 500
-checkpoint_freq: 1000
-checkpoint_path: "examples/rbm/checkpoint/rbm2/checkpoint/step6000-worker0.bin"
+train_one_batch{
+ alg: kCD
+}
+checkpoint_path: "examples/rbm/rbm2/checkpoint/step6000-worker0.bin"
updater{
type: kSGD
momentum: 0.9
@@ -22,7 +22,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_train_shard"
+ path: "examples/mnist/mnist_train_shard"
batchsize: 100
}
exclude: kTest
@@ -33,7 +33,7 @@ layer {
name: "data"
type: kShardData
sharddata_conf {
- path: "examples/rbm/mnist_test_shard"
+ path: "examples/mnist/mnist_test_shard"
batchsize: 100
}
exclude: kTrain
@@ -59,19 +59,9 @@ layer{
}
param{
name: "w1"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb12"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -90,19 +80,9 @@ layer{
}
param{
name: "w2"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb22"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -121,19 +101,9 @@ layer{
}
param{
name: "w3"
- init{
- type: kUniform
- low:-0.05
- high:0.05
- }
}
param{
name: "rb32"
- init{
- type: kUniform
- low: -0.05
- high:0.05
- }
}
}
@@ -148,16 +118,10 @@ layer{
type: kRBMVis
srclayers:"sigmoid3"
srclayers:"RBMHid"
- rbmvis_conf{
- num_output: 30
- }
param{
- name: "w4"
- init{
- type: kGaussian
- mean: 0.0
- std: 0.1
- }
+ name: "w4_"
+ share_from: "w4"
+
}
param{
name: "rb41"
@@ -177,8 +141,12 @@ layer{
gaussian: true
}
param{
- name: "w4_1"
- share_from: "w4"
+ name: "w4"
+ init{
+ type: kGaussian
+ mean: 0.0
+ std: 0.1
+ }
}
param{
name: "rb42"
@@ -194,5 +162,5 @@ cluster {
nserver_groups: 1
nservers_per_group: 1
nworkers_per_group: 1
- workspace: "examples/rbm/checkpoint/rbm3/"
+ workspace: "examples/rbm/rbm3/"
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/include/mshadow/tensor_random.h
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor_random.h b/include/mshadow/tensor_random.h
index 72164a8..59ef082 100644
--- a/include/mshadow/tensor_random.h
+++ b/include/mshadow/tensor_random.h
@@ -68,20 +68,27 @@ namespace mshadow {
gen_.seed(seed);
#endif
}
+ template<int dim>
+ inline void SampleBinary(Tensor<cpu, dim> &src) {
+ SampleBinary(src, src);
+ }
+
/*!
* \brief generate binary data according to a probability matrix
+ * \param src source
* \param dst destination
* \param a lower bound of uniform
* \param b upper bound of uniform
* \tparam dim dimension of tensor
*/
template<int dim>
- inline void SampleBinary( Tensor<cpu, dim> &dst) {
+ inline void SampleBinary(Tensor<cpu, dim> &dst, Tensor<cpu, dim> &src) {
real_t a=0.0f;
real_t b=1.0f;
- Tensor<cpu, 2> mat = dst.FlatTo2D();
+ Tensor<cpu, 2> dmat = dst.FlatTo2D();
+ Tensor<cpu, 2> smat = src.FlatTo2D();
std::uniform_real_distribution<real_t> distribution (a,b);
- for ( index_t i = 0; i < mat.shape[1]; ++i ) {
+ for ( index_t i = 0; i < dmat.shape[1]; ++i ) {
#if MSHADOW_USE_MKL
#if MSHADOW_SINGLE_PRECISION
int status = vsRngUniform( 0, vStream_, mat.shape[0], mat[i].dptr, a, b );
@@ -96,8 +103,8 @@ namespace mshadow {
mat[i][j] = this->RandNext()*(b-a) + a;
}
*/
- for ( index_t j = 0; j < mat.shape[0]; ++j ) {
- mat[i][j] = distribution(gen_) > mat[i][j] ? 0.0f: 1.0f;
+ for ( index_t j = 0; j < dmat.shape[0]; ++j ) {
+ dmat[i][j] = distribution(gen_) > smat[i][j] ? 0.0f: 1.0f;
}
#endif
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/include/neuralnet/base_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/base_layer.h b/include/neuralnet/base_layer.h
index 5575fc7..9aa207d 100644
--- a/include/neuralnet/base_layer.h
+++ b/include/neuralnet/base_layer.h
@@ -49,25 +49,25 @@ class Layer {
/**
* Compute features of this layer based on connected layers.
*
- * @param phase kTrain, kTest, kPositive, etc.
+ * @param flag kTrain, kTest, kPositive, etc.
*/
- virtual void ComputeFeature(Phase phase, Metric* perf) = 0;
+ virtual void ComputeFeature(int flag, Metric* perf) = 0;
/**
* Compute gradients for parameters and connected layers.
*
- * @param phase kTrain, kTest, kPositive, etc.
+ * @param flag kTrain, kTest, kPositive, etc.
*/
virtual void ComputeLoss(Metric* perf) {}
- virtual void ComputeGradient(Phase phase) = 0;
+ virtual void ComputeGradient(int flag) = 0;
/**
* For print debug info about each layer, e.g., norm of feature vector,
* norm of parameters.
*
* @param step training/test/validation step
- * @param phase forward/backward/positive/negative...
+ * @param flag forward/backward/positive/negative...
* @return debug info about this layer.
*/
- const string DebugString(int step, Phase phase);
+ const string DebugString(int step, int flag);
/**
* Layers that have paramters must override this function.
*
@@ -141,10 +141,10 @@ class Layer {
/**
* @return a const ref for Blob storing neuron values of this layer for BP
*/
- virtual const Blob<float>& data(const Layer* from, Phase = kPositive) const {
+ virtual const Blob<float>& data(const Layer* from) const {
return data_;
}
- virtual Blob<float>* mutable_data(const Layer* from, Phase = kPositive) {
+ virtual Blob<float>* mutable_data(const Layer* from) {
return &data_;
}
@@ -246,15 +246,15 @@ class BridgeSrcLayer: public BridgeLayer {
using Layer::ComputeFeature;
using Layer::ComputeGradient;
- void ComputeFeature(Phase phase, Metric* perf) override {}
- void ComputeGradient(Phase phase) override {
+ void ComputeFeature(int flag, Metric* perf) override {}
+ void ComputeGradient(int flag) override {
ready_ = false;
}
- const Blob<float>& data(const Layer* from, Phase phase) const override {
+ const Blob<float>& data(const Layer* from) const override {
return srclayers_[0]->data(this);
}
- Blob<float>* mutable_data(const Layer* from, Phase phase) override {
+ Blob<float>* mutable_data(const Layer* from) override {
return srclayers_[0]->mutable_data(this);
}
const Blob<float>& grad(const Layer* from) const override {
@@ -278,11 +278,11 @@ class BridgeDstLayer: public BridgeLayer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric* perf) override {
+ void ComputeFeature(int flag, Metric* perf) override {
// reset ready_ for next iteration.
ready_ = false;
}
- void ComputeGradient(Phase phase) override {}
+ void ComputeGradient(int flag) override {}
bool is_bridgedstlayer() const {
return true;
}
@@ -297,8 +297,8 @@ class ConcateLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric* perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric* perf) override;
+ void ComputeGradient(int flag) override;
};
/**
@@ -311,11 +311,11 @@ class DataLayer: public Layer{
using Layer::mutable_grad;
using Layer::dst_layer_connection;
- void ComputeGradient(Phase phase) override {}
+ void ComputeGradient(int flag) override {}
bool is_datalayer() const override {
return true;
}
- Blob<float>* mutable_data(const Layer* layer, Phase phase) override {
+ Blob<float>* mutable_data(const Layer* layer) override {
return nullptr;
}
Blob<float>* mutable_grad(const Layer* layer) override {
@@ -357,11 +357,11 @@ class PrefetchLayer : public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric* perf) override;
- void ComputeGradient(Phase phase) override {};
+ void ComputeFeature(int flag, Metric* perf) override;
+ void ComputeGradient(int flag) override {};
- const Blob<float>& data(const Layer* from, Phase phase) const override;
- Blob<float>* mutable_data(const Layer* layer, Phase phase) override;
+ const Blob<float>& data(const Layer* from) const override;
+ Blob<float>* mutable_data(const Layer* layer) override;
Blob<float>* mutable_grad(const Layer* layer) override {
return nullptr;
@@ -371,7 +371,7 @@ class PrefetchLayer : public Layer {
return grad_;
}
- void Prefetch(Phase phase);
+ void Prefetch(int flag);
virtual ~PrefetchLayer();
protected:
@@ -389,14 +389,14 @@ class SliceLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric* perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric* perf) override;
+ void ComputeGradient(int flag) override;
ConnectionType dst_layer_connection() const override {
return kOneToMany;
}
- const Blob<float>& data(const Layer* layer, Phase phase) const override;
+ const Blob<float>& data(const Layer* layer) const override;
const Blob<float>& grad(const Layer* layer) const override;
- Blob<float>* mutable_data(const Layer* layer, Phase phase) override;
+ Blob<float>* mutable_data(const Layer* layer) override;
Blob<float>* mutable_grad(const Layer* layer) override;
protected:
@@ -418,8 +418,8 @@ class SplitLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric* perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric* perf) override;
+ void ComputeGradient(int flag) override;
ConnectionType dst_layer_connection() const override {
return kOneToMany;
}
@@ -462,12 +462,12 @@ class ParserLayer: public Layer {
using Layer::mutable_grad;
using Layer::grad;
- void ComputeFeature(Phase phase, Metric* perf) override;
- void ComputeGradient(Phase phase) override {};
+ void ComputeFeature(int flag, Metric* perf) override;
+ void ComputeGradient(int flag) override {};
/**
* Parse records from DataLayer into blob.
*/
- virtual void ParseRecords(Phase phase, const vector<Record>& records,
+ virtual void ParseRecords(int flag, const vector<Record>& records,
Blob<float>* blob) = 0;
bool is_parserlayer() const override {
return true;
@@ -480,6 +480,33 @@ class ParserLayer: public Layer {
return grad_;
}
};
+
+class RBMLayer: public Layer {
+ public:
+ const Blob<float>& neg_data(const Layer* layer) {
+ return neg_data_;
+ }
+ Blob<float>* mutable_neg_data(const Layer* layer) {
+ return &neg_data_;
+ }
+ const vector<Param*> GetParams() const override {
+ vector<Param*> params{weight_, bias_};
+ return params;
+ }
+ virtual Blob<float>* Sample(int flat) = 0;
+
+ protected:
+ //! dimension of the hidden layer
+ int hdim_;
+ //! dimension of the visible layer
+ int vdim_;
+ int batchsize_;
+ Param* weight_, *bias_;
+
+ Blob<float> neg_data_;
+ Blob<float> neg_sample_;
+ Blob<float> sample_;
+};
} // namespace singa
#endif // SINGA_NEURALNET_BASE_LAYER_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index b1fbbb0..435d854 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -31,8 +31,8 @@ class ConvolutionLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
const vector<Param*> GetParams() const override {
vector<Param*> params{weight_, bias_};
return params;
@@ -57,8 +57,8 @@ class DropoutLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
protected:
// drop probability
@@ -68,112 +68,42 @@ class DropoutLayer: public Layer {
*/
Blob<float> mask_;
};
+
/**
* RBM visible layer
*/
-class RBMVisLayer: public Layer {
+class RBMVisLayer: public RBMLayer {
public:
using Layer::ComputeFeature;
using Layer::ComputeGradient;
- void Setup(const LayerProto& proto,
- int npartitions) override;
- virtual bool is_vislayer() const {
- return true;
- }
-
- void ComputeFeature(Phase phase,
- Metric *perf) override;
- void ComputeGradient(Phase phase) override;
- virtual void ComputeLoss(Metric* perf);
- virtual Blob<float>* mutable_data(const Layer* from, Phase phase) {
- if (phase == kPositive) {
- return &data_;
- } else {
- return &vis_sample_;
- }
- }
- virtual const Blob<float>& data(const Layer* from, Phase phase) const {
- if (phase == kPositive) {
- return data_;
- } else {
- return vis_sample_;
- }
- }
- // virtual void ToProto(LayerProto *layer_proto, bool copyData);
- const vector<Param*> GetParams() const override {
- vector<Param*> params{weight_, bias_};
- return params;
- }
~RBMVisLayer();
-
-
+ void Setup(const LayerProto& proto, int npartitions) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
+ Blob<float>* Sample(int flat) override;
private:
- //! dimension of the hidden layer
- int hdim_;
- //! dimension of the visible layer
- int vdim_;
- int batchsize_;
- // batchsize of negative phase
- int neg_batchsize_;
- bool is_first_iteration_vis_;
- float scale_;
- // srclayer index
- int data_idx_;
- int hid_idx_;
- Param* weight_, *bias_;
- // data to store sampling result
- Blob<float> vis_sample_;
- // in order to implement Persistent Contrastive Divergence,
+ RBMLayer* hid_layer_;
+ Layer* input_layer_;
};
/**
* RBM hidden layer
*/
-class RBMHidLayer: public Layer {
+class RBMHidLayer: public RBMLayer {
public:
using Layer::ComputeFeature;
using Layer::ComputeGradient;
- void Setup(const LayerProto& proto,
- int npartitions) override;
- virtual bool is_hidlayer() const {
- return true;
- }
-
- void ComputeFeature(Phase phase,
- Metric *perf) override;
- void ComputeGradient(Phase phase) override;
- virtual Blob<float>* mutable_data(const Layer* from, Phase phase) {
- if (phase == kPositive)
- return &data_;
- else
- return &hid_sample_;
- }
- virtual const Blob<float>& data(const Layer* from, Phase phase) const {
- if (phase == kPositive)
- return data_;
- else
- return hid_sample_;
- }
- const vector<Param*> GetParams() const override {
- vector<Param*> params{weight_, bias_};
- return params;
- }
~RBMHidLayer();
-
+ void Setup(const LayerProto& proto, int npartitions) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
+ Blob<float>* Sample(int flat) override;
private:
- //! dimension of the hidden layer
- int hdim_;
- int vdim_; // dimension of visible layer
- int batchsize_;
- // batchsize of negative phase
- int neg_batchsize_;
- float scale_;
// whether use gaussian sampling
bool gaussian_;
- Blob<float> hid_sample_;
- Param* weight_, *bias_;
+ RBMLayer *vis_layer_;
};
/**
* fully connected layer
@@ -184,8 +114,8 @@ class InnerProductLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
ConnectionType src_neuron_connection(int k) const override {
// CHECK_LT(k, srclayers_.size());
@@ -212,7 +142,7 @@ class LabelLayer: public ParserLayer {
using ParserLayer::ParseRecords;
void Setup(const LayerProto& proto, int npartitions) override;
- void ParseRecords(Phase phase, const vector<Record>& records,
+ void ParseRecords(int flag, const vector<Record>& records,
Blob<float>* blob) override;
};
@@ -229,8 +159,8 @@ class LRNLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
protected:
//! shape of the bottom layer feature
@@ -247,7 +177,7 @@ class MnistLayer: public ParserLayer {
using ParserLayer::ParseRecords;
void Setup(const LayerProto& proto, int npartitions) override;
- void ParseRecords(Phase phase, const vector<Record>& records,
+ void ParseRecords(int flag, const vector<Record>& records,
Blob<float>* blob) override;
ConnectionType dst_layer_connection() const override {
return kOneToMany;
@@ -269,8 +199,8 @@ class PoolingLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
protected:
int kernel_, pad_, stride_;
@@ -284,8 +214,8 @@ class ReLULayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions = 1) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
};
class EuclideanLossLayer: public LossLayer {
@@ -294,8 +224,8 @@ class EuclideanLossLayer: public LossLayer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
int partition_dim() const override {
@@ -321,8 +251,8 @@ class SoftmaxLossLayer: public LossLayer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
/**
* softmax is not recommendeded for partition because it requires the whole
@@ -349,7 +279,7 @@ class RGBImageLayer: public ParserLayer {
using ParserLayer::ParseRecords;
void Setup(const LayerProto& proto, int npartitions) override;
- void ParseRecords(Phase phase, const vector<Record>& records,
+ void ParseRecords(int flag, const vector<Record>& records,
Blob<float>* blob) override;
private:
@@ -365,7 +295,7 @@ class ShardDataLayer: public DataLayer{
~ShardDataLayer();
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
+ void ComputeFeature(int flag, Metric *perf) override;
private:
DataShard* shard_;
@@ -382,8 +312,8 @@ class SigmoidLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
};
/**
@@ -397,8 +327,8 @@ class TanhLayer: public Layer {
using Layer::ComputeGradient;
void Setup(const LayerProto& proto, int npartitions) override;
- void ComputeFeature(Phase phase, Metric *perf) override;
- void ComputeGradient(Phase phase) override;
+ void ComputeFeature(int flag, Metric *perf) override;
+ void ComputeGradient(int flag) override;
private:
float outer_scale_, inner_scale_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/include/trainer/worker.h
----------------------------------------------------------------------
diff --git a/include/trainer/worker.h b/include/trainer/worker.h
index 86b1c90..35ce77e 100644
--- a/include/trainer/worker.h
+++ b/include/trainer/worker.h
@@ -192,15 +192,9 @@ class BPWorker: public Worker{
class CDWorker: public Worker{
public:
- ~CDWorker() {}
- void Init(int thread_id, int grp_id, int id) override;
void TrainOneBatch(int step, Metric* perf) override;
void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net,
Metric* perf) override;
- void PositivePhase(int step, shared_ptr<NeuralNet> net, Metric* perf);
- void NegativePhase(int step, shared_ptr<NeuralNet> net, Metric* perf);
- void GradientPhase(int step, shared_ptr<NeuralNet> net);
- void LossPhase(int step, shared_ptr<NeuralNet> net, Metric* perf);
};
inline int BlobTrgt(int grp, int layer) {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/include/utils/param.h
----------------------------------------------------------------------
diff --git a/include/utils/param.h b/include/utils/param.h
index f7a0982..0d24e95 100644
--- a/include/utils/param.h
+++ b/include/utils/param.h
@@ -74,13 +74,14 @@ class Param {
static Param* Create(const ParamProto& proto);
Param();
virtual ~Param() {}
+ void Init(const ParamProto& proto) { proto_ = proto; }
/**
* Setup param object
*
* @param conf param configuration, include learning rate multiplier etc.
* @param shape one value per dimension
*/
- virtual void Setup(const ParamProto& conf, const std::vector<int>& shape);
+ virtual void Setup(const std::vector<int>& shape);
/*
* Fill the values according to init method, e.g., gaussian distribution.
*
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index e5045a3..9fa4b86 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -1,9 +1,12 @@
-#include "singa.h"
#include <cblas.h>
#include <glog/logging.h>
#include <string>
+#include "singa.h"
+
+#include "utils/tinydir.h"
+
namespace singa {
void Driver::Init(int argc, char **argv) {
@@ -89,6 +92,9 @@ void Driver::Submit(bool resume, const JobProto& jobConf) {
if (singa_conf_.has_log_dir())
SetupLog(singa_conf_.log_dir(), std::to_string(job_id_)
+ "-" + jobConf.name());
+ tinydir_dir workspace;
+ if (tinydir_open(&workspace, jobConf.cluster().workspace().c_str()) == -1)
+ LOG(FATAL) << "workspace does not exist: " << jobConf.cluster().workspace();
if (jobConf.num_openblas_threads() != 1)
LOG(WARNING) << "openblas with "
<< jobConf.num_openblas_threads() << " threads";
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/src/neuralnet/base_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/base_layer.cc b/src/neuralnet/base_layer.cc
index f995353..46f8b57 100644
--- a/src/neuralnet/base_layer.cc
+++ b/src/neuralnet/base_layer.cc
@@ -1,5 +1,3 @@
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/imgproc/imgproc.hpp>
#include <cblas.h>
#include <math.h>
#include <cfloat>
@@ -24,14 +22,13 @@ void Layer::Setup(const LayerProto& proto, int npartitions) {
layer_proto_ = proto;
}
-const string Layer::DebugString(int step, Phase phase) {
+const string Layer::DebugString(int step, int flag) {
string ret =StringPrintf("Layer %10s ", name().c_str());
- if(data_.count() != 0)
- return ret;
- if(phase == kForward) {
- ret += StringPrintf("data %10s data norm1 %13.9f", data_.asum_data());
- }else if(phase == kBackward) {
- ret += StringPrintf("grad norm1 %13.9f\n", grad_.asum_data());
+ if ((flag & kForward) == kForward && data_.count() !=0) {
+ ret += StringPrintf("data norm1 %13.9f", data_.asum_data());
+ } else if ((flag & kBackward) == kBackward) {
+ if (grad_.count() != 0)
+ ret += StringPrintf("grad norm1 %13.9f\n", grad_.asum_data());
for(Param* p: GetParams())
ret += StringPrintf("param id %2d, name %10s,\
value norm1 %13.9f, grad norm1 %13.9f\n",
@@ -68,41 +65,41 @@ void ConcateLayer::Setup(const LayerProto& proto, int npartitions) {
grad_.Reshape(shape);
}
-void ConcateLayer::ComputeFeature(Phase phase, Metric *perf){
+void ConcateLayer::ComputeFeature(int flag, Metric *perf){
LOG(FATAL) << "Not implemented for Concate Layer";
}
-void ConcateLayer::ComputeGradient(Phase phase){
+void ConcateLayer::ComputeGradient(int flag){
LOG(FATAL) << "Not implemented for Concate Layer";
}
/************* Implementation for ParserLayer ***********/
-void ParserLayer::ComputeFeature(Phase phase, Metric *perf){
+void ParserLayer::ComputeFeature(int flag, Metric *perf){
CHECK_EQ(srclayers_.size(),1);
auto datalayer=static_cast<DataLayer*>(*srclayers_.begin());
- ParseRecords(phase, datalayer->records(), &data_);
+ ParseRecords(flag, datalayer->records(), &data_);
}
/************* Implementation for PrefetchLayer ***********/
-void PrefetchLayer::Prefetch(Phase phase){
+void PrefetchLayer::Prefetch(int flag){
//clock_t s=clock();
for(auto layer: sublayers_)
- layer->ComputeFeature(phase, nullptr);
+ layer->ComputeFeature(flag, nullptr);
//LOG(ERROR)<<(clock()-s)*1.0/CLOCKS_PER_SEC;
}
-void PrefetchLayer::ComputeFeature(Phase phase, Metric* perf){
+void PrefetchLayer::ComputeFeature(int flag, Metric* perf){
if(thread_.joinable())
thread_.join();
else{
- Prefetch(phase);
+ Prefetch(flag);
}
for(auto layer: sublayers_){
if(layer->is_parserlayer())
// TODO replace CopyFrom with Swap?
datablobs_.at(layer->name()).CopyFrom(layer->data(this));
}
- thread_=std::thread(&PrefetchLayer::Prefetch, this, phase);
+ thread_=std::thread(&PrefetchLayer::Prefetch, this, flag);
}
void PrefetchLayer::Setup(const LayerProto& proto, int npartitions) {
@@ -133,7 +130,7 @@ void PrefetchLayer::Setup(const LayerProto& proto, int npartitions) {
datablobs_[layer->name()]=Blob<float>(layer->data(this).shape());
}
-const Blob<float>& PrefetchLayer::data(const Layer* from, Phase phase) const {
+const Blob<float>& PrefetchLayer::data(const Layer* from) const {
LOG(FATAL) << " needs update";
if(from != nullptr) {
return datablobs_.at("");
@@ -143,7 +140,7 @@ const Blob<float>& PrefetchLayer::data(const Layer* from, Phase phase) const {
}
}
-Blob<float>* PrefetchLayer::mutable_data(const Layer* from, Phase phase) {
+Blob<float>* PrefetchLayer::mutable_data(const Layer* from) {
LOG(FATAL) << " needs update";
if(from!=nullptr){
return &(datablobs_.at(""));
@@ -194,7 +191,7 @@ int SliceLayer::SliceID(const Layer* layer) const {
return -1;
}
-const Blob<float>& SliceLayer::data(const Layer* layer, Phase phase) const {
+const Blob<float>& SliceLayer::data(const Layer* layer) const {
if(layer==nullptr)
return data_;
return datavec_[SliceID(layer)];
@@ -204,7 +201,7 @@ const Blob<float>& SliceLayer::grad(const Layer* layer) const {
return grad_;
return gradvec_[SliceID(layer)];
}
-Blob<float>* SliceLayer::mutable_data(const Layer* layer, Phase phase) {
+Blob<float>* SliceLayer::mutable_data(const Layer* layer) {
if(layer==nullptr)
return &data_;
return &datavec_[SliceID(layer)];
@@ -214,7 +211,7 @@ Blob<float>* SliceLayer::mutable_grad(const Layer* layer){
return &grad_;
return &gradvec_[SliceID(layer)];
}
-void SliceLayer::ComputeFeature(Phase phase, Metric *perf) {
+void SliceLayer::ComputeFeature(int flag, Metric *perf) {
CHECK_EQ(srclayers_.size(),1);
if(slice_dim_==0){
const auto& blob=srclayers_.at(0)->data(this);
@@ -226,7 +223,7 @@ void SliceLayer::ComputeFeature(Phase phase, Metric *perf) {
}
}
}
-void SliceLayer::ComputeGradient(Phase phase) {
+void SliceLayer::ComputeGradient(int flag) {
// LOG(FATAL) << "Not implemented";
}
@@ -240,11 +237,11 @@ void SplitLayer::Setup(const LayerProto& proto, int npartitions) {
grad_.Reshape(srclayers_[0]->data(this).shape());
}
-void SplitLayer::ComputeFeature(Phase phase, Metric *perf) {
+void SplitLayer::ComputeFeature(int flag, Metric *perf) {
LOG(FATAL) << "Not implemented";
}
-void SplitLayer::ComputeGradient(Phase phase) {
+void SplitLayer::ComputeGradient(int flag) {
LOG(FATAL) << "Not implemented";
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index b5c986e..29a2312 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -72,11 +72,11 @@ void ConvolutionLayer::Setup(const LayerProto& proto, int npartitions) {
weight_ = Param::Create(proto.param(0));
bias_ = Param::Create(proto.param(1));
- weight_->Setup(proto.param(0), vector<int>{num_filters_, col_height_});
- bias_->Setup(proto.param(1), vector<int>{num_filters_});
+ weight_->Setup(vector<int>{num_filters_, col_height_});
+ bias_->Setup(vector<int>{num_filters_});
}
-void ConvolutionLayer::ComputeFeature(Phase phase, Metric* perf){
+void ConvolutionLayer::ComputeFeature(int flag, Metric* perf){
auto src = Tensor4(srclayers_[0]->mutable_data(this));
auto data = Tensor3(&data_);
auto col = Tensor2(&col_data_);
@@ -93,7 +93,7 @@ void ConvolutionLayer::ComputeFeature(Phase phase, Metric* perf){
data+=broadcast<1>(bias, data.shape);
}
-void ConvolutionLayer::ComputeGradient(Phase phase) {
+void ConvolutionLayer::ComputeGradient(int flag) {
auto src = Tensor4(srclayers_[0]->mutable_data(this));
auto col = Tensor2(&col_data_);
auto weight = Tensor2(weight_->mutable_data());
@@ -137,9 +137,9 @@ void DropoutLayer::Setup(const LayerProto& proto, int npartitions) {
pdrop_ = proto.dropout_conf().dropout_ratio();
}
-void DropoutLayer::ComputeFeature(Phase phase, Metric* perf) {
+void DropoutLayer::ComputeFeature(int flag, Metric* perf) {
// check training
- if(phase != kTrain){//!training){
+ if((flag & kTrain) != kTrain) {
data_.CopyFrom(srclayers_[0]->data(this));
return;
}
@@ -152,7 +152,7 @@ void DropoutLayer::ComputeFeature(Phase phase, Metric* perf) {
data = src * mask;
}
-void DropoutLayer::ComputeGradient(Phase phase) {
+void DropoutLayer::ComputeGradient(int flag) {
auto mask = Tensor1(&mask_);
auto grad = Tensor1(&grad_);
auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this));
@@ -164,94 +164,69 @@ RBMVisLayer::~RBMVisLayer() {
delete bias_;
}
-void RBMVisLayer::Setup(const LayerProto& proto,
- int npartitions) {
+void RBMVisLayer::Setup(const LayerProto& proto, int npartitions) {
Layer::Setup(proto, npartitions);
CHECK_EQ(srclayers_.size(), 2);
- // hid_idx_: index indicating which srclayer is is hidden layer
- // data_idx_: index indicating which srclayer is data layer
- for (unsigned int i = 0; i < srclayers_.size(); i++)
- for (unsigned int j = 0; j < (srclayers_[i]-> dstlayers()).size(); j++)
- if (strcmp(((srclayers_[i]->dstlayers()).at(j)->name().c_str()),
- (this->name()).c_str()) == 0)
- hid_idx_ = i;
- for (unsigned int i = 0; i < srclayers_.size(); i++)
- if (i != static_cast<unsigned int>(hid_idx_) )
- data_idx_ = i;
- const auto& src = srclayers_[data_idx_]->data(this);
- is_first_iteration_vis_ = true;
+ hid_layer_ = nullptr;
+ for (auto src : srclayers_) {
+ for (auto dst : src->srclayers()) {
+ if (dst->name() == name()) {
+ CHECK(hid_layer_ == nullptr);
+ hid_layer_ = static_cast<RBMHidLayer*>(src);
+ }
+ }
+ }
+ input_layer_ = srclayers_[0] != hid_layer_ ? srclayers_[0]: srclayers_[1];
+ const auto& src = input_layer_->data(this);
batchsize_ = src.shape()[0];
- neg_batchsize_ = batchsize_;
- /*gibbs sampling size and input have the same size*/
- vdim_ = src.count()/batchsize_;
- hdim_ = proto.rbmvis_conf().num_output();
- data_.Reshape(vector<int>{batchsize_, vdim_}); // this is visible dimension
- vis_sample_.Reshape(vector<int>{neg_batchsize_, vdim_});
+ data_.ReshapeLike(src); // this is visible dimension
+ neg_data_.ReshapeLike(data_);
+ neg_sample_.ReshapeLike(data_);
weight_ = Param::Create(proto.param(0));
bias_ = Param::Create(proto.param(1));
- weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_});
- bias_->Setup(proto.param(1), vector<int>{vdim_});
-}
-
-void RBMVisLayer::ComputeFeature(Phase phase, Metric* perf) {
- if (phase == kPositive) { /*positive phase*/
- auto data = Tensor2(&data_);
- CHECK_EQ(srclayers_[data_idx_]->data(this).count(), batchsize_*vdim_);
- auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this));
- Copy(data, src);
- } else if (phase == kNegative) { /*negative phase*/
- auto hid_sample =
- Tensor2(srclayers_[hid_idx_]->mutable_data(this, kNegative));
- // fetch sampling results from hidden layer
- auto vis_sample = Tensor2(&vis_sample_);
- auto weight = Tensor2(weight_->mutable_data());
- auto bias = Tensor1(bias_->mutable_data());
- vis_sample = dot(hid_sample, weight);
- vis_sample+=repmat(bias, neg_batchsize_);
- vis_sample = F<op::sigmoid>(vis_sample);
+ bias_->Setup(vector<int>{src.count() / batchsize_});
+}
+Blob<float>* RBMVisLayer::Sample(int flag) {
+ Tensor<cpu, 2> sample, data;
+ if ((flag & kPositive) == kPositive) {
+ LOG(FATAL) << "RBMVisLayer can not be sampled for positive flag";
+ } else {
+ data = Tensor2(&neg_data_);
+ sample = Tensor2(&neg_sample_);
+ }
+ auto random = TSingleton<Random<cpu>>::Instance();
+ random->SampleBinary(sample, data);
+ return &neg_sample_;
+}
+void RBMVisLayer::ComputeFeature(int flag, Metric* perf) {
+ if ((flag & kPositive) == kPositive) { /*positive flag*/
+ data_.CopyFrom(input_layer_->data(this), true);
+ } else if ((flag & kNegative) == kNegative) { /*negative flag*/
+ auto hid_sample = Tensor2(hid_layer_->Sample(flag));
+ // fetch sampling results from hidden layer
+ auto data = Tensor2(&neg_data_);
+ auto weight = Tensor2(weight_->mutable_data());
+ auto bias = Tensor1(bias_->mutable_data());
+ data = dot(hid_sample, weight);
+ data += repmat(bias, batchsize_);
+ data = F<op::sigmoid>(data);
+ if ((flag & kTest) == kTest) {
+ const float *dptr = data_.cpu_data(), *rcns = neg_data_.cpu_data();
+ float err = 0.f;
+ for (int i = 0; i < data_.count(); i++) {
+ err += (dptr[i] - rcns[i]) * (dptr[i] - rcns[i]);
+ }
+ perf->Add("Squared Error", err / batchsize_);
}
+ }
}
-void RBMVisLayer::ComputeGradient(Phase phase) {
- auto data = Tensor2(&data_);
- auto hid_data = Tensor2(srclayers_[hid_idx_]->mutable_data(this, kPositive));
- auto vis_sample = Tensor2(&vis_sample_);
- auto hid_sample =
- Tensor2(srclayers_[hid_idx_]->mutable_data(this, kNegative));
- // fetch sampling results from hidden layer
- auto gweight = Tensor2(weight_->mutable_grad());
- auto gbias = Tensor1(bias_->mutable_grad());
- gbias = sum_rows(vis_sample);
- gbias -= sum_rows(data);
- gweight = dot(hid_sample.T(), vis_sample);
- gweight -= dot(hid_data.T(), data);
- gbias*=(1.0f)/(1.0f*batchsize_);
- gweight*=(1.0f)/(1.0f*batchsize_);
-}
-
-void RBMVisLayer::ComputeLoss(Metric* perf) {
- float loss_sqr = (0.0f);
- CHECK_EQ(srclayers_[data_idx_]->data(this).count(), batchsize_*vdim_);
- auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this));
- auto hid_data = Tensor2(srclayers_[hid_idx_]->mutable_data(this, kPositive));
- // gibbs using u
- auto weight = Tensor2(weight_->mutable_data());
- auto bias = Tensor1(bias_->mutable_data());
- Tensor<cpu, 2> reconstruct(Shape2(batchsize_, vdim_)); /*reconstruct error*/
- AllocSpace(reconstruct);
- reconstruct = dot(hid_data, weight);
- reconstruct+=repmat(bias, batchsize_);
- reconstruct = F<op::sigmoid>(reconstruct);
- float *src_dptr = src.dptr;
- for (int i = 0; i < vdim_*batchsize_; i++) {
- int recon_row = i / vdim_;
- int recon_col = i - recon_row * vdim_;
- loss_sqr += (src_dptr[i] - reconstruct[recon_row][recon_col]) *
- (src_dptr[i] - reconstruct[recon_row][recon_col]);
- }
- FreeSpace(reconstruct);
- perf->Reset();
- perf->Add("sqr_reconstruct_error", loss_sqr);
+void RBMVisLayer::ComputeGradient(int flag) {
+ auto vis_pos = Tensor2(&data_);
+ auto vis_neg = Tensor2(&neg_data_);
+ auto gbias = Tensor1(bias_->mutable_grad());
+ gbias = sum_rows(vis_neg);
+ gbias -= sum_rows(vis_pos);
}
/**************** Implementation for RBMHidLayer********************/
RBMHidLayer::~RBMHidLayer() {
@@ -263,84 +238,75 @@ void RBMHidLayer::Setup(const LayerProto& proto,
int npartitions) {
Layer::Setup(proto, npartitions);
CHECK_EQ(srclayers_.size(), 1);
- const auto& src_data = srclayers_[0]->data(this, kPositive);
- const auto& src_sample = srclayers_[0]->data(this, kNegative);
- scale_ = static_cast<float> (1.0f);
+ const auto& src_data = srclayers_[0]->data(this);
batchsize_ = src_data.shape()[0];
- neg_batchsize_ = src_sample.shape()[0];
vdim_ = src_data.count()/batchsize_;
hdim_ = proto.rbmhid_conf().hid_dim();
gaussian_ = proto.rbmhid_conf().gaussian();
data_.Reshape(vector<int>{batchsize_, hdim_});
- hid_sample_.Reshape(vector<int>{neg_batchsize_, hdim_});
+ neg_data_.ReshapeLike(data_);
+ sample_.ReshapeLike(data_);
+ neg_sample_.ReshapeLike(data_);
weight_ = Param::Create(proto.param(0));
bias_ = Param::Create(proto.param(1));
- bias_->Setup(proto.param(1), vector<int>{hdim_});
- weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_});
+ bias_->Setup(vector<int>{hdim_});
+ weight_->Setup(vector<int>{hdim_, vdim_});
+ vis_layer_ = static_cast<RBMVisLayer*> (srclayers_[0]);
}
-void RBMHidLayer::ComputeFeature(Phase phase, Metric* perf) {
- if (phase == kPositive) { /*postive phase*/
- auto data = Tensor2(&data_);
-
- auto hid_sample = Tensor2(&hid_sample_);
-
- CHECK_EQ(srclayers_[0]->data(this, kPositive).count(), batchsize_*vdim_);
- auto src = Tensor2(srclayers_[0]->mutable_data(this, kPositive));
- auto weight = Tensor2(weight_->mutable_data());
- auto bias = Tensor1(bias_->mutable_data());
- data = dot(src, weight.T());
- data += repmat(bias, batchsize_);
-
- if (!gaussian_)
- data = F<op::sigmoid>(data);
+Blob<float>* RBMHidLayer::Sample(int flag) {
+ Tensor<cpu, 2> sample, data;
+ if ((flag & kPositive) == kPositive) {
+ data = Tensor2(&data_);
+ sample = Tensor2(&sample_);
+ } else {
+ data = Tensor2(&neg_data_);
+ sample = Tensor2(&neg_sample_);
+ }
+ auto random = TSingleton<Random<cpu>>::Instance();
+ if (gaussian_) { // first gibbs
+ random->SampleGaussian(sample, 0.0f, 1.0f);
+ sample += data;
+ } else {
+ random->SampleBinary(sample, data);
+ }
+ return (flag & kPositive) == kPositive ? &sample_ : &neg_sample_;
+}
- Copy(hid_sample, data);
+void RBMHidLayer::ComputeFeature(int flag, Metric* perf) {
+ auto weight = Tensor2(weight_->mutable_data());
+ auto bias = Tensor1(bias_->mutable_data());
- if (gaussian_) { // first gibbs
- Tensor<cpu, 2> gaussian_sample(Shape2(batchsize_, hdim_));
- AllocSpace(gaussian_sample);
- auto random = TSingleton<Random<cpu>>::Instance();
- random->SampleGaussian(gaussian_sample, 0.0f, 1.0f);
- hid_sample += gaussian_sample;
- FreeSpace(gaussian_sample);
- } else {
- TSingleton<Random<cpu>>::Instance()->SampleBinary(hid_sample);
- }
+ Tensor<cpu, 2> data, src;
+ if ((flag & kPositive) == kPositive) { /*postive flag*/
+ data = Tensor2(&data_);
+ src = Tensor2(vis_layer_->mutable_data(this));
+ } else {
+ data = Tensor2(&neg_data_);
+ src = Tensor2(vis_layer_->Sample(flag));
+ }
+ data = dot(src, weight.T());
+ data += repmat(bias, batchsize_);
- } else if (phase == kNegative) { /*negative phase*/
- CHECK_EQ(srclayers_[0]->data(this, kNegative).count(),
- neg_batchsize_*vdim_);
- auto src_sample = Tensor2(srclayers_[0]->mutable_data(this, kNegative));
- auto hid_sample = Tensor2(&hid_sample_);
- auto bias = Tensor1(bias_->mutable_data());
- auto weight = Tensor2(weight_->mutable_data());
- hid_sample = dot(src_sample, weight.T());
- hid_sample += repmat(bias, neg_batchsize_);
- if (!gaussian_)
- hid_sample = F<op::sigmoid>(hid_sample);
- } else if (phase == kLoss) { /*test phase*/
- auto data = Tensor2(&data_); // data: sigmoid(Wv+b)
- if (gaussian_) {
- Tensor<cpu, 2> gaussian_sample(Shape2(batchsize_, hdim_));
- AllocSpace(gaussian_sample);
- auto random = TSingleton<Random<cpu>>::Instance();
- random->SampleGaussian(gaussian_sample, 0.0f, 1.0f);
- data += gaussian_sample;
- FreeSpace(gaussian_sample);
- }
- else
- TSingleton<Random<cpu>>::Instance()->SampleBinary(data);
- }
+ if (!gaussian_)
+ data = F<op::sigmoid>(data);
}
-void RBMHidLayer::ComputeGradient(Phase phase) {
- auto data = Tensor2(&data_);
- auto hid_sample = Tensor2(&hid_sample_);
+void RBMHidLayer::ComputeGradient(int flag) {
+ auto hid_pos = Tensor2(&data_);
+ auto hid_neg = Tensor2(&neg_data_);
+ auto vis_pos = Tensor2(vis_layer_->mutable_data(this));
+ auto vis_neg = Tensor2(vis_layer_->mutable_data(this));
+
auto gbias = Tensor1(bias_->mutable_grad());
- gbias = sum_rows(hid_sample);
- gbias -= sum_rows(data);
- gbias *= scale_/(1.0f*batchsize_);
+ gbias = sum_rows(hid_neg);
+ gbias -= sum_rows(hid_pos);
+ gbias /= batchsize_;
+
+ auto gweight = Tensor2(weight_->mutable_grad());
+ gweight = dot(hid_neg.T(), vis_neg);
+ gweight -= dot(hid_pos.T(), vis_pos);
+ gweight /= batchsize_;
}
/*********** Implementation for InnerProductLayer**********/
InnerProductLayer::~InnerProductLayer() {
@@ -362,13 +328,13 @@ void InnerProductLayer::Setup(const LayerProto& proto, int npartitions) {
weight_ = Param::Create(proto.param(0));
bias_ = Param::Create(proto.param(1));
if (transpose_)
- weight_->Setup(proto.param(0), vector<int>{vdim_, hdim_});
+ weight_->Setup(vector<int>{vdim_, hdim_});
else
- weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_});
- bias_->Setup(proto.param(1), vector<int>{hdim_});
+ weight_->Setup(vector<int>{hdim_, vdim_});
+ bias_->Setup(vector<int>{hdim_});
}
-void InnerProductLayer::ComputeFeature(Phase phase, Metric* perf) {
+void InnerProductLayer::ComputeFeature(int flag, Metric* perf) {
auto data = Tensor2(&data_);
auto src = Tensor2(srclayers_[0]->mutable_data(this));
auto weight = Tensor2(weight_->mutable_data());
@@ -381,7 +347,7 @@ void InnerProductLayer::ComputeFeature(Phase phase, Metric* perf) {
data+=repmat(bias, batchsize_);
}
-void InnerProductLayer::ComputeGradient(Phase phas) {
+void InnerProductLayer::ComputeGradient(int phas) {
auto src = Tensor2(srclayers_[0]->mutable_data(this));
auto grad = Tensor2(&grad_);
auto weight = Tensor2(weight_->mutable_data());
@@ -411,7 +377,7 @@ void LabelLayer::Setup(const LayerProto& proto, int npartitions){
data_.Reshape(vector<int>{batchsize});
}
-void LabelLayer::ParseRecords(Phase phase, const vector<Record>& records,
+void LabelLayer::ParseRecords(int flag, const vector<Record>& records,
Blob<float>* blob){
int rid=0;
float *label= blob->mutable_cpu_data() ;
@@ -442,7 +408,7 @@ void LRNLayer::Setup(const LayerProto& proto, int npartitions) {
width_=s[3];
}
-void LRNLayer::ComputeFeature(Phase phase, Metric* perf) {
+void LRNLayer::ComputeFeature(int flag, Metric* perf) {
const float salpha = alpha_ / lsize_;
auto src = Tensor4(srclayers_[0]->mutable_data(this));
auto data = Tensor4(&data_);
@@ -452,7 +418,7 @@ void LRNLayer::ComputeFeature(Phase phase, Metric* perf) {
data = src * F<op::power>(norm, -beta_ );
}
-void LRNLayer::ComputeGradient(Phase phase) {
+void LRNLayer::ComputeGradient(int flag) {
const float salpha = alpha_ / lsize_;
auto src = Tensor4(srclayers_[0]->mutable_data(this));
auto norm = Tensor4(&norm_);
@@ -466,8 +432,10 @@ void LRNLayer::ComputeGradient(Phase phase) {
/**************** Implementation for MnistImageLayer******************/
-void MnistLayer::ParseRecords(Phase phase,
+void MnistLayer::ParseRecords(int flag,
const vector<Record>& records, Blob<float>* blob){
+ if ((flag & kForward) == 0)
+ return;
LOG_IF(ERROR, records.size()==0)<<"Empty records to parse";
int ndim=records.at(0).image().shape_size();
int inputsize =records.at(0).image().shape(ndim-1);
@@ -554,7 +522,7 @@ void PoolingLayer::Setup(const LayerProto& proto, int npartitions) {
grad_.ReshapeLike(data_);
}
-void PoolingLayer::ComputeFeature(Phase phase, Metric* perf) {
+void PoolingLayer::ComputeFeature(int flag, Metric* perf) {
auto src = Tensor4(srclayers_[0]->mutable_data(this));
auto data = Tensor4(&data_);
if(pool_ == PoolingProto_PoolMethod_MAX)
@@ -567,7 +535,7 @@ void PoolingLayer::ComputeFeature(Phase phase, Metric* perf) {
* partition only on num/channel dim
* assume grad and data have the same paritition
*/
-void PoolingLayer::ComputeGradient(Phase phase) {
+void PoolingLayer::ComputeGradient(int flag) {
auto src = Tensor4(srclayers_[0]->mutable_data(this));
auto gsrc = Tensor4(srclayers_[0]->mutable_grad(this));
auto data = Tensor4(&data_);
@@ -587,13 +555,13 @@ void ReLULayer::Setup(const LayerProto& proto, int npartitions) {
grad_.ReshapeLike(*(srclayers_[0]->mutable_grad(this)));
}
-void ReLULayer::ComputeFeature(Phase phase, Metric* perf) {
+void ReLULayer::ComputeFeature(int flag, Metric* perf) {
auto data = Tensor1(&data_);
auto src = Tensor1(srclayers_[0]->mutable_data(this));
data=F<op::relu>(src);
}
-void ReLULayer::ComputeGradient(Phase phase) {
+void ReLULayer::ComputeGradient(int flag) {
auto data = Tensor1(&data_);
auto grad = Tensor1(&grad_);
auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this));
@@ -602,8 +570,11 @@ void ReLULayer::ComputeGradient(Phase phase) {
/*************** Implementation for RGBImageLayer *************************/
-void RGBImageLayer::ParseRecords(Phase phase,
+void RGBImageLayer::ParseRecords(int flag,
const vector<Record>& records, Blob<float>* blob){
+ if ((flag & kForward) == 0)
+ return;
+
const vector<int>& s=blob->shape();
auto images = Tensor4(&data_);
const SingleLabelImageRecord& r=records.at(0).image();
@@ -617,8 +588,8 @@ void RGBImageLayer::ParseRecords(Phase phase,
const float* meandptr=mean_.cpu_data();
for(const Record& record: records){
auto image=images[rid];
- bool do_crop=cropsize_>0&&(phase == kTrain);
- bool do_mirror=mirror_&&rand()%2&&(phase == kTrain);
+ bool do_crop = cropsize_ > 0 && ((flag & kTrain) == kTrain);
+ bool do_mirror = mirror_ && rand() % 2 && ((flag & kTrain) == kTrain);
float* dptr=nullptr;
if(do_crop||do_mirror)
dptr=raw_image.dptr;
@@ -697,7 +668,10 @@ void RGBImageLayer::Setup(const LayerProto& proto, int npartitions) {
}
/***************Implementation for ShardDataLayer**************************/
-void ShardDataLayer::ComputeFeature(Phase phase, Metric* perf){
+void ShardDataLayer::ComputeFeature(int flag, Metric* perf){
+ if ((flag & kForward) == 0)
+ return;
+
if (shard_ == nullptr)
shard_ = new DataShard(layer_proto_.sharddata_conf().path(),
DataShard::kRead);
@@ -747,13 +721,13 @@ void SigmoidLayer::Setup(const LayerProto& proto, int npartitions) {
grad_.ReshapeLike(srclayers_[0]->grad(this));
}
-void SigmoidLayer::ComputeFeature(Phase phase, Metric* perf) {
+void SigmoidLayer::ComputeFeature(int flag, Metric* perf) {
auto data = Tensor1(&data_);
auto src = Tensor1(srclayers_[0]->mutable_data(this));
data = F<op::sigmoid>(src);
}
-void SigmoidLayer::ComputeGradient(Phase phase) {
+void SigmoidLayer::ComputeGradient(int flag) {
auto data = Tensor1(&data_);
auto grad = Tensor1(&grad_);
auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this));
@@ -766,13 +740,13 @@ void TanhLayer::Setup(const LayerProto& proto, int npartitions){
grad_.ReshapeLike(srclayers_[0]->grad(this));
}
-void TanhLayer::ComputeFeature(Phase phase, Metric* perf) {
+void TanhLayer::ComputeFeature(int flag, Metric* perf) {
auto data = Tensor1(&data_);
auto src = Tensor1(srclayers_[0]->mutable_data(this));
data=F<op::stanh>(src);
}
-void TanhLayer::ComputeGradient(Phase phase) {
+void TanhLayer::ComputeGradient(int flag) {
auto data = Tensor1(&data_);
auto grad = Tensor1(&grad_);
auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this));
@@ -787,7 +761,7 @@ void EuclideanLossLayer::Setup(const LayerProto& proto, int npartitions) {
dim_ = data_.count()/batchsize_;
metric_.Reshape(vector<int>{1});
}
-void EuclideanLossLayer::ComputeFeature(Phase phase, Metric* perf) {
+void EuclideanLossLayer::ComputeFeature(int flag, Metric* perf) {
const float* reconstruct_dptr = srclayers_[0]->data(this).cpu_data();
const float* input_dptr = srclayers_[1]->data(this).cpu_data();
float loss = 0;
@@ -805,7 +779,7 @@ void EuclideanLossLayer::ComputeFeature(Phase phase, Metric* perf) {
srclayers_[1]->data(this).cpu_data() + (batchsize_*dim_));
perf->Add("loss", loss/(1.0f*batchsize_));
}
-void EuclideanLossLayer::ComputeGradient(Phase phase) {
+void EuclideanLossLayer::ComputeGradient(int flag) {
const float* reconstruct_dptr = srclayers_[0]->data(this).cpu_data();
const float* input_dptr = srclayers_[1]->data(this).cpu_data();
Blob<float>* gsrcblob = srclayers_[0]->mutable_grad(this);
@@ -828,7 +802,7 @@ void SoftmaxLossLayer::Setup(const LayerProto& proto, int npartitions) {
metric_.Reshape(vector<int>{2});
scale_=proto.softmaxloss_conf().scale();
}
-void SoftmaxLossLayer::ComputeFeature(Phase phase, Metric* perf) {
+void SoftmaxLossLayer::ComputeFeature(int flag, Metric* perf) {
Shape<2> s=Shape2(batchsize_, dim_);
Tensor<cpu, 2> prob(data_.mutable_cpu_data(), s);
Tensor<cpu, 2> src(srclayers_[0]->mutable_data(this)->mutable_cpu_data(), s);
@@ -863,7 +837,7 @@ void SoftmaxLossLayer::ComputeFeature(Phase phase, Metric* perf) {
perf->Add("accuracy", precision*scale_/(1.0f*batchsize_));
}
-void SoftmaxLossLayer::ComputeGradient(Phase phase) {
+void SoftmaxLossLayer::ComputeGradient(int flag) {
const float* label=srclayers_[1]->data(this).cpu_data();
Blob<float>* gsrcblob=srclayers_[0]->mutable_grad(this);
gsrcblob->CopyFrom(data_);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 7f030ac..1c79aea 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -21,18 +21,12 @@ message JobProto {
required string name = 1;
// neural net consits of a set of connected layers
required NetProto neuralnet = 3;
- // algorithms calculating gradients for one mini-batch/iteration
- optional TrainOneBatchAlg alg = 5 [default = kUserAlg];
- // user defined algorithm
- optional string user_alg = 6;
+ // algorithm for computing gradients over one mini-batch
+ required AlgProto train_one_batch = 5;
// configuration of SGD updater, including learning rate, etc.
required UpdaterProto updater = 7;
// cluster toplogy conf
required ClusterProto cluster = 9;
-
- // for setting CD fields
- optional CDProto cd_conf = 12;
-
// total num of steps for training
required int32 train_steps = 16;
// frequency of displaying training info
@@ -86,6 +80,16 @@ message JobProto {
// Protos used by JobProto
// -----------------------
+message AlgProto {
+ // algorithms calculating gradients for one mini-batch/iteration
+ optional AlgType alg = 1 [default = kUserAlg];
+ // user defined algorithm
+ optional string user_alg = 2;
+ // for setting CD fields
+ optional CDProto cd_conf = 10;
+
+ extensions 101 to 200;
+}
message NetProto {
repeated LayerProto layer = 1;
// partitioning type for parallelism
@@ -140,7 +144,7 @@ message ClusterProto {
message CDProto {
//number of steps for gibbs sampling
- optional int32 pcd_k = 1 [default = 1];
+ optional int32 cd_k = 1 [default = 1];
}
message LayerProto {
@@ -182,8 +186,6 @@ message LayerProto {
optional PrefetchProto prefetch_conf = 44;
// configuration for rbmhid layer
optional RBMHidProto rbmhid_conf = 49;
- // configuration for rbmvis layer
- optional RBMVisProto rbmvis_conf = 48;
// configuration for rectified linear unit layer
optional ReLUProto relu_conf = 38;
// configuration for rgb image parser layer
@@ -365,11 +367,6 @@ message DropoutProto {
optional float dropout_ratio = 30 [default = 0.5];
}
-message RBMVisProto {
- optional int32 num_output = 1; // The number of outputs for the layer
- optional bool bias_term = 2 [default = true]; // whether to have bias terms
-}
-
message RBMHidProto {
optional int32 hid_dim = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
@@ -559,16 +556,16 @@ enum PartitionType {
}
enum Phase {
- kTrain = 0;
- kValidation = 1;
- kTest= 2;
+ kTrain = 1;
+ kValidation = 2;
+ kTest= 4;
// postivie phase for contrastive divergence algorithm
- kPositive = 3;
+ kPositive = 8;
// negative phase for contrastive divergence algorithm
- kNegative = 4;
- kForward = 5;
- kBackward = 6;
- kLoss = 7;
+ kNegative = 16;
+ kForward = 32;
+ kBackward = 64;
+ kLoss = 128;
}
enum ParamType {
@@ -578,7 +575,7 @@ enum ParamType {
kUser = 103;
}
-enum TrainOneBatchAlg {
+enum AlgType {
// Back-propagation algorithm for feed-forward models, e.g., CNN and RNN
kBP = 1;
// Contrastive Divergence algorithm for RBM, DBM, etc.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/src/trainer/worker.cc
----------------------------------------------------------------------
diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc
index f112b17..a22a8ef 100644
--- a/src/trainer/worker.cc
+++ b/src/trainer/worker.cc
@@ -13,12 +13,14 @@ using std::thread;
Worker* Worker::Create(const JobProto& proto) {
auto factory = Singleton<Factory<singa::Worker>>::Instance();
Worker* worker = nullptr;
- if (proto.has_user_alg())
- worker = factory->Create(proto.user_alg());
+ const auto& conf = proto.train_one_batch();
+ if (conf.has_user_alg())
+ worker = factory->Create(conf.user_alg());
else
- worker = factory->Create(proto.alg());
+ worker = factory->Create(conf.alg());
return worker;
}
+
void Worker::Init(int thread_id, int grp_id, int id) {
thread_id_ = thread_id;
grp_id_ = grp_id;
@@ -63,7 +65,7 @@ void Worker::InitLocalParams() {
// the param from previous checkpoint files will be overwritten by
// the param with the same name in later checkpoint files.
for (const auto checkpoint : job_conf_.checkpoint_path()) {
- LOG(INFO) << "Load from checkpoint file " << checkpoint;
+ LOG(ERROR) << "Load from checkpoint file " << checkpoint;
BlobProtos bps;
ReadProtoFromBinaryFile(checkpoint.c_str(), &bps);
for (int i = 0; i < bps.name_size(); i++) {
@@ -342,11 +344,11 @@ void BPWorker::Forward(
Collect(p, step);
}
}
- layer->ComputeFeature(phase, perf);
+ layer->ComputeFeature(phase | kForward, perf);
if (layer->is_bridgesrclayer()) // send data to other workers
SendBlobs(true, false, static_cast<BridgeLayer*>(layer), net);
if (DisplayDebugInfo(step))
- LOG(INFO) << layer->DebugString(step, kForward);
+ LOG(INFO) << layer->DebugString(step, phase | kForward);
}
}
}
@@ -359,9 +361,9 @@ void BPWorker::Backward(int step, shared_ptr<NeuralNet> net) {
if(layer->is_bridgesrclayer()) {
// ReceiveBlobs(false, true, layer, net);
}
- layer->ComputeGradient(kTrain);
+ layer->ComputeGradient(kTrain | kBackward);
if (DisplayDebugInfo(step))
- LOG(INFO) << layer->DebugString(step, kBackward);
+ LOG(INFO) << layer->DebugString(step, kTrain | kBackward);
for (Param* p : layer->GetParams())
Update(p, step);
if (layer->is_bridgedstlayer()) {
@@ -381,72 +383,34 @@ void BPWorker::TestOneBatch(int step, Phase phase,
Forward(step, phase, net, perf);
}
/****************************CDWorker**********************************/
-void CDWorker::Init(int thread_id, int group_id, int worker_id) {
- Worker::Init(thread_id, group_id, worker_id);
-}
-
-void CDWorker::PositivePhase(int step,
- shared_ptr<NeuralNet> net, Metric* perf) {
- auto& layers = net->layers();
- // LOG(ERROR)<<"Positive Phase";
- for (auto& layer : layers) {
- for (Param* p : layer->GetParams()) { // wait until param is updated
+void CDWorker::TrainOneBatch(int step, Metric* perf) {
+ const auto& layers = train_net_->layers();
+ for (auto* layer : layers) {
+ for (Param* p : layer->GetParams()) // wait until param is updated
Collect(p, step);
- }
- layer->ComputeFeature(kPositive, perf);
+ layer->ComputeFeature(kPositive | kForward, perf);
}
-}
-
-void CDWorker::NegativePhase(int step,
- shared_ptr<NeuralNet> net, Metric* perf) {
-// for negative phase, gibbs sampling only concerns RBM bottom and top layer
- auto& layers = net->layers();
- // LOG(ERROR)<<"Negative Phase";
- for (auto& layer : layers) {
- if (layer->is_vislayer() || layer->is_hidlayer()) {
- layer->ComputeFeature(kNegative, perf);
- }
- }
-}
-
-void CDWorker::GradientPhase(int step, shared_ptr<NeuralNet> net) {
- auto& layers = net->layers();
- // LOG(ERROR)<<"Gradient Phase";
- for (auto& layer : layers) {
- if (layer->is_vislayer() || layer->is_hidlayer()) {
- layer->ComputeGradient(kTrain);
- for (Param* p : layer->GetParams()) {
- Update(p, step);
- }
+ for (auto* layer : layers)
+ layer->ComputeFeature(kNegative | kTest, perf);
+ for (int i = 1; i < job_conf_.train_one_batch().cd_conf().cd_k(); i++) {
+ for (auto* layer : layers) {
+ layer->ComputeFeature(kNegative, perf);
}
}
-}
-
-void CDWorker::LossPhase(int step, shared_ptr<NeuralNet> net, Metric* perf) {
- auto& layers = net->layers();
- // LOG(ERROR)<<"Loss Phase";
- for (auto& layer : layers) {
- if (layer->is_hidlayer()) {
- layer->ComputeFeature(kLoss, perf);
+ for (auto* layer : layers) {
+ layer->ComputeGradient(kTrain);
+ for (Param* p : layer->GetParams()) {
+ Update(p, step);
}
}
- for (auto& layer : layers) {
- if (layer->is_vislayer()) {
- layer->ComputeLoss(perf);
- }
- }
-}
-
-void CDWorker::TrainOneBatch(int step, Metric* perf) {
- PositivePhase(step, train_net_, perf);
- NegativePhase(step, train_net_, perf);
- GradientPhase(step, train_net_);
- LossPhase(step, train_net_, perf);
}
void CDWorker::TestOneBatch(int step, Phase phase,
- shared_ptr<NeuralNet> net, Metric* perf) {
- PositivePhase(step, test_net_, perf);
- LossPhase(step, test_net_, perf);
+ shared_ptr<NeuralNet> net, Metric* perf) {
+ auto& layers = net->layers();
+ for (auto layer : layers)
+ layer->ComputeFeature(kPositive | kForward, perf);
+ for (auto layer : layers)
+ layer->ComputeFeature(kNegative | kTest, perf);
}
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/src/utils/common.cc
----------------------------------------------------------------------
diff --git a/src/utils/common.cc b/src/utils/common.cc
index 1888380..d13faea 100644
--- a/src/utils/common.cc
+++ b/src/utils/common.cc
@@ -40,7 +40,7 @@ string IntVecToString(const vector<int>& vec) {
* * Formatted string.
* */
string VStringPrintf(string fmt, va_list l) {
- char buffer[32768];
+ char buffer[4096];
vsnprintf(buffer, sizeof(buffer), fmt.c_str(), l);
return string(buffer);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fbbcaafd/src/utils/param.cc
----------------------------------------------------------------------
diff --git a/src/utils/param.cc b/src/utils/param.cc
index 67f14ab..69f697b 100644
--- a/src/utils/param.cc
+++ b/src/utils/param.cc
@@ -73,17 +73,18 @@ Param* Param::Create(const ParamProto& proto) {
p = factory->Create(proto.user_type());
else
p = factory->Create(proto.type());
+ p->Init(proto);
return p;
}
Param::Param():local_version_(-1), slice_start_(0), num_slices_(0),
num_pending_requests_(0), data_(nullptr) {
}
-void Param::Setup(const ParamProto& proto, const vector<int>& shape) {
+
+void Param::Setup(const vector<int>& shape) {
data_ = std::make_shared<Blob<float>>(shape);
grad_.Reshape(shape);
history_.Reshape(shape);
- proto_.CopyFrom(proto);
}
void Param::AddSlice(int slice_id, int size) {
@@ -178,7 +179,8 @@ Msg* Param::HandlePutMsg(Msg** msg, bool reserve) {
proto.set_lr_scale(lr);
proto.set_wd_scale(wc);
vector<int> shape{size};
- Setup(proto, shape);
+ Init(proto);
+ Setup(shape);
if (ptr == nullptr) {
CHECK((*msg)->NextFrame());
CHECK_EQ(size* sizeof(float), (*msg)->FrameSize());
@@ -298,6 +300,8 @@ void Param::ShareFrom(const Param& other) {
other.data_->shape().begin()));
}
data_ = other.data_;
+ if (grad_.count() == 0)
+ grad_.Reshape(data_->shape());
slice_offset_ = other.slice_offset_;
slice_size_ = other.slice_size_;
slice_start_ = other.slice_start_;