You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/06/24 15:35:51 UTC
[08/18] incubator-singa git commit: SIGNA-21 Code review-2
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/cifar10/model-lmdb.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/model-lmdb.conf b/examples/cifar10/model-lmdb.conf
deleted file mode 100644
index ea22ccd..0000000
--- a/examples/cifar10/model-lmdb.conf
+++ /dev/null
@@ -1,218 +0,0 @@
-name: "cifar10-convnet"
-train_steps: 70000
-test_steps:100
-test_frequency:1000
-display_frequency:50
-updater{
- momentum:0.9
- weight_decay:0.004
- learning_rate_change_method:kFixedStep
- step:0
- step:60000
- step:65000
- step_lr:0.001
- step_lr:0.0001
- step_lr:0.00001
-}
-neuralnet {
-layer {
- name: "data"
- type: "kLMDBData"
- data_param {
- path: "examples/cifar10/cifar10_train_lmdb"
- batchsize: 100
- }
- exclude: kTest
-}
-
-layer {
- name: "data"
- type: "kLMDBData"
- data_param {
- path: "examples/cifar10/cifar10_test_lmdb"
- batchsize: 100
- }
- exclude: kTrain
-}
-
-layer{
- name:"rgb"
- type: "kRGBImage"
- srclayers: "data"
- rgbimage_param {
- meanfile: "examples/cifar10/mean.binaryproto"
- }
-}
-
-layer{
- name: "label"
- type: "kLabel"
- srclayers: "data"
-}
-layer {
- name: "conv1"
- type: "kConvolution"
- srclayers: "rgb"
- convolution_param {
- num_filters: 32
- kernel: 5
- stride: 1
- pad:2
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.0001
- learning_rate_multiplier:1.0
- }
- param{
- name: "bias"
- init_method: kConstant
- learning_rate_multiplier:2.0
- value:0
- }
-}
-layer {
- name: "pool1"
- type: "kPooling"
- srclayers: "conv1"
- pooling_param {
- pool: MAX
- kernel: 3
- stride: 2
- }
-}
-layer {
- name: "relu1"
- type: "kReLU"
- srclayers:"pool1"
-}
-layer {
- name: "norm1"
- type: "kLRN"
- lrn_param {
- norm_region: WITHIN_CHANNEL
- local_size: 3
- alpha: 5e-05
- beta: 0.75
- }
- srclayers:"relu1"
-}
-layer {
- name: "conv2"
- type: "kConvolution"
- srclayers: "norm1"
- convolution_param {
- num_filters: 32
- kernel: 5
- stride: 1
- pad:2
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.01
- learning_rate_multiplier:1.0
- }
- param{
- name: "bias"
- init_method: kConstant
- learning_rate_multiplier:2.0
- value:0
- }
-}
-layer {
- name: "relu2"
- type: "kReLU"
- srclayers:"conv2"
-}
-layer {
- name: "pool2"
- type: "kPooling"
- srclayers: "relu2"
- pooling_param {
- pool: MAX
- kernel: 3
- stride: 2
- }
-}
-layer {
- name: "norm2"
- type: "kLRN"
- lrn_param {
- norm_region: WITHIN_CHANNEL
- local_size: 3
- alpha: 5e-05
- beta: 0.75
- }
- srclayers:"pool2"
-}
-layer {
- name: "conv3"
- type: "kConvolution"
- srclayers: "norm2"
- convolution_param {
- num_filters: 64
- kernel: 5
- stride: 1
- pad:2
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.01
- }
- param{
- name: "bias"
- init_method: kConstant
- value:0
- }
-}
-layer {
- name: "relu3"
- type: "kReLU"
- srclayers:"conv3"
-}
-layer {
- name: "pool3"
- type: "kPooling"
- srclayers: "relu3"
- pooling_param {
- pool: AVE
- kernel: 3
- stride: 2
- }
-}
-layer {
- name: "ip1"
- type: "kInnerProduct"
- srclayers:"pool3"
- inner_product_param {
- num_output: 10
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.01
- learning_rate_multiplier:1.0
- weight_decay_multiplier:250
- }
- param{
- name: "bias"
- init_method: kConstant
- learning_rate_multiplier:2.0
- weight_decay_multiplier:0
- value:0
- }
-}
-
-layer{
- name: "loss"
- type:"kSoftmaxLoss"
- softmaxloss_param{
- topk:1
- }
- srclayers:"ip1"
- srclayers:"label"
-}
-}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/cifar10/model-prefetch.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/model-prefetch.conf b/examples/cifar10/model-prefetch.conf
deleted file mode 100644
index 220a4b9..0000000
--- a/examples/cifar10/model-prefetch.conf
+++ /dev/null
@@ -1,241 +0,0 @@
-name: "cifar10-convnet"
-train_steps: 70000
-test_steps:100
-test_frequency:1000
-display_frequency:50
-updater{
- momentum:0.9
- weight_decay:0.004
- learning_rate_change_method:kFixedStep
- step:0
- step:60000
- step:65000
- step_lr:0.001
- step_lr:0.0001
- step_lr:0.00001
-}
-neuralnet {
-layer{
- name: "prefetch"
- type: "kPrefetch"
- sublayers {
- name: "data"
- type: "kShardData"
- data_param {
- path: "examples/cifar10/cifar10_train_shard"
- batchsize: 100
- }
- }
- sublayers{
- name:"rgb"
- type: "kRGBImage"
- srclayers: "data"
- rgbimage_param {
- meanfile: "examples/cifar10/image_mean.bin"
- }
- }
- sublayers{
- name: "label"
- type: "kLabel"
- srclayers: "data"
- }
- exclude: kTest
-}
-
-layer{
- name: "prefetch"
- type: "kPrefetch"
- sublayers {
- name: "data"
- type: "kShardData"
- data_param {
- path: "examples/cifar10/cifar10_test_shard"
- batchsize: 100
- }
- }
- sublayers{
- name:"rgb"
- type: "kRGBImage"
- srclayers: "data"
- rgbimage_param {
- meanfile: "examples/cifar10/image_mean.bin"
- }
- }
- sublayers{
- name: "label"
- type: "kLabel"
- srclayers: "data"
- }
- exclude: kTrain
-}
-
-layer {
- name: "conv1"
- type: "kConvolution"
- srclayers: "prefetch"
- datablob: "rgb"
- convolution_param {
- num_filters: 32
- kernel: 5
- stride: 1
- pad:2
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.0001
- learning_rate_multiplier:1.0
- }
- param{
- name: "bias"
- init_method: kConstant
- learning_rate_multiplier:2.0
- value:0
- }
-}
-
-layer {
- name: "pool1"
- type: "kPooling"
- srclayers: "conv1"
- pooling_param {
- pool: MAX
- kernel: 3
- stride: 2
- }
-}
-layer {
- name: "relu1"
- type: "kReLU"
- srclayers:"pool1"
-}
-layer {
- name: "norm1"
- type: "kLRN"
- lrn_param {
- norm_region: WITHIN_CHANNEL
- local_size: 3
- alpha: 5e-05
- beta: 0.75
- }
- srclayers:"relu1"
-}
-layer {
- name: "conv2"
- type: "kConvolution"
- srclayers: "norm1"
- convolution_param {
- num_filters: 32
- kernel: 5
- stride: 1
- pad:2
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.01
- learning_rate_multiplier:1.0
- }
- param{
- name: "bias"
- init_method: kConstant
- learning_rate_multiplier:2.0
- value:0
- }
-}
-layer {
- name: "relu2"
- type: "kReLU"
- srclayers:"conv2"
-}
-layer {
- name: "pool2"
- type: "kPooling"
- srclayers: "relu2"
- pooling_param {
- pool: MAX
- kernel: 3
- stride: 2
- }
-}
-layer {
- name: "norm2"
- type: "kLRN"
- lrn_param {
- norm_region: WITHIN_CHANNEL
- local_size: 3
- alpha: 5e-05
- beta: 0.75
- }
- srclayers:"pool2"
-}
-layer {
- name: "conv3"
- type: "kConvolution"
- srclayers: "norm2"
- convolution_param {
- num_filters: 64
- kernel: 5
- stride: 1
- pad:2
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.01
- }
- param{
- name: "bias"
- init_method: kConstant
- value:0
- }
-}
-layer {
- name: "relu3"
- type: "kReLU"
- srclayers:"conv3"
-}
-layer {
- name: "pool3"
- type: "kPooling"
- srclayers: "relu3"
- pooling_param {
- pool: AVE
- kernel: 3
- stride: 2
- }
-}
-layer {
- name: "ip1"
- type: "kInnerProduct"
- srclayers:"pool3"
- inner_product_param {
- num_output: 10
- }
- param{
- name: "weight"
- init_method:kGaussian
- std:0.01
- learning_rate_multiplier:1.0
- weight_decay_multiplier:250
- }
- param{
- name: "bias"
- init_method: kConstant
- learning_rate_multiplier:2.0
- weight_decay_multiplier:0
- value:0
- }
-}
-
-layer{
- name: "loss"
- type:"kSoftmaxLoss"
- softmaxloss_param{
- topk:1
- }
- srclayers:"ip1"
- srclayers:"prefetch"
- datablob: "label"
-}
-}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/cifar10/model.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/model.conf b/examples/cifar10/model.conf
index bfd7683..0d6e4fd 100644
--- a/examples/cifar10/model.conf
+++ b/examples/cifar10/model.conf
@@ -5,22 +5,24 @@ test_frequency:300
display_frequency:30
alg: kBackPropagation
updater{
- momentum:0.0
weight_decay:0.004
- learning_rate_change_method:kFixedStep
- step:0
- step:60000
- step:65000
- step_lr:0.001
- step_lr:0.0001
- step_lr:0.00001
+ lr_change: kFixedStep
+ type: kSGD
+ fixedstep_conf:{
+ step:0
+ step:60000
+ step:65000
+ step_lr:0.001
+ step_lr:0.0001
+ step_lr:0.00001
+ }
}
neuralnet {
partition_type: kDataPartition
layer{
name: "data"
- type: "kShardData"
- data_param {
+ type: kShardData
+ sharddata_conf {
path: "examples/cifar10/cifar10_train_shard"
batchsize: 16
}
@@ -28,8 +30,8 @@ layer{
}
layer{
name: "data"
- type: "kShardData"
- data_param {
+ type: kShardData
+ sharddata_conf {
path: "examples/cifar10/cifar10_test_shard"
batchsize: 100
}
@@ -37,23 +39,23 @@ layer{
}
layer{
name:"rgb"
- type: "kRGBImage"
+ type: kRGBImage
srclayers: "data"
- rgbimage_param {
+ rgbimage_conf {
meanfile: "examples/cifar10/image_mean.bin"
}
}
layer{
name: "label"
- type: "kLabel"
+ type: kLabel
srclayers: "data"
}
layer {
name: "conv1"
- type: "kConvolution"
+ type: kConvolution
srclayers: "rgb"
- convolution_param {
+ convolution_conf {
num_filters: 32
kernel: 5
stride: 1
@@ -75,9 +77,9 @@ layer {
layer {
name: "pool1"
- type: "kPooling"
+ type: kPooling
srclayers: "conv1"
- pooling_param {
+ pooling_conf {
pool: MAX
kernel: 3
stride: 2
@@ -85,13 +87,13 @@ layer {
}
layer {
name: "relu1"
- type: "kReLU"
+ type: kReLU
srclayers:"pool1"
}
layer {
name: "norm1"
- type: "kLRN"
- lrn_param {
+ type: kLRN
+ lrn_conf {
norm_region: WITHIN_CHANNEL
local_size: 3
alpha: 5e-05
@@ -101,9 +103,9 @@ layer {
}
layer {
name: "conv2"
- type: "kConvolution"
+ type: kConvolution
srclayers: "norm1"
- convolution_param {
+ convolution_conf {
num_filters: 32
kernel: 5
stride: 1
@@ -124,14 +126,14 @@ layer {
}
layer {
name: "relu2"
- type: "kReLU"
+ type: kReLU
srclayers:"conv2"
}
layer {
name: "pool2"
- type: "kPooling"
+ type: kPooling
srclayers: "relu2"
- pooling_param {
+ pooling_conf {
pool: MAX
kernel: 3
stride: 2
@@ -139,8 +141,8 @@ layer {
}
layer {
name: "norm2"
- type: "kLRN"
- lrn_param {
+ type: kLRN
+ lrn_conf {
norm_region: WITHIN_CHANNEL
local_size: 3
alpha: 5e-05
@@ -150,9 +152,9 @@ layer {
}
layer {
name: "conv3"
- type: "kConvolution"
+ type: kConvolution
srclayers: "norm2"
- convolution_param {
+ convolution_conf {
num_filters: 64
kernel: 5
stride: 1
@@ -171,14 +173,14 @@ layer {
}
layer {
name: "relu3"
- type: "kReLU"
+ type: kReLU
srclayers:"conv3"
}
layer {
name: "pool3"
- type: "kPooling"
+ type: kPooling
srclayers: "relu3"
- pooling_param {
+ pooling_conf {
pool: AVE
kernel: 3
stride: 2
@@ -186,9 +188,9 @@ layer {
}
layer {
name: "ip1"
- type: "kInnerProduct"
+ type: kInnerProduct
srclayers:"pool3"
- inner_product_param {
+ innerproduct_conf {
num_output: 10
}
param{
@@ -209,8 +211,8 @@ layer {
layer{
name: "loss"
- type:"kSoftmaxLoss"
- softmaxloss_param{
+ type: kSoftmaxLoss
+ softmaxloss_conf{
topk:1
}
srclayers:"ip1"
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/mnist/create_shard.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_shard.cc b/examples/mnist/create_shard.cc
index f545b80..09229ac 100644
--- a/examples/mnist/create_shard.cc
+++ b/examples/mnist/create_shard.cc
@@ -17,7 +17,7 @@
#include "utils/data_shard.h"
#include "utils/common.h"
-#include "proto/model.pb.h"
+#include "proto/common.pb.h"
using singa::DataShard;
using singa::WriteProtoToBinaryFile;
@@ -85,21 +85,6 @@ void create_shard(const char* image_filename, const char* label_filename,
}
int main(int argc, char** argv) {
-/*
-#ifndef GFLAGS_GFLAGS_H_
- namespace gflags = google;
-#endif
- gflags::SetUsageMessage("This program create a DataShard for a MNIST dataset\n"
- "Usage:\n"
- " create_shard.bin input_image_file input_label_file output_db_file\n"
- "The MNIST dataset could be downloaded at\n"
- " http://yann.lecun.com/exdb/mnist/\n"
- "You should gunzip them after downloading.");
- gflags::ParseCommandLineFlags(&argc, &argv, true);
- gflags::ShowUsageWithFlagsRestrict(argv[0],
- "examples/mnist/create_shard.bin");
-*/
-
if (argc != 4) {
std::cout<<"This program create a DataShard for a MNIST dataset\n"
"Usage:\n"
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/mnist/mlp-lmdb.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/mlp-lmdb.conf b/examples/mnist/mlp-lmdb.conf
deleted file mode 100644
index d0ed08f..0000000
--- a/examples/mnist/mlp-lmdb.conf
+++ /dev/null
@@ -1,223 +0,0 @@
-name: "deep-big-simple-mlp"
-train_steps: 10000
-test_steps:10
-test_frequency:60
-display_frequency:30
-checkpoint_frequency:120
-updater{
- base_learning_rate: 0.001
- learning_rate_change_method: kStep
- learning_rate_change_frequency: 60
- gamma: 0.997
- param_type: "Param"
-}
-
-neuralnet {
-layer {
- name: "data"
- type: "kLMDBData"
- data_param {
- path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb"
- batchsize: 1000
- random_skip: 10000
- }
- exclude: kTest
-}
-
-layer {
- name: "data"
- type: "kLMDBData"
- data_param {
- path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb"
- batchsize: 1000
- }
- exclude: kTrain
-}
-
-layer{
- name:"mnist"
- type: "kMnistImage"
- srclayers: "data"
- mnist_param {
-# sigma: 6
-# alpha: 38
-# gamma: 15
-# kernel: 21
-# elastic_freq:100
-# beta:15
-# resize: 29
- norm_a: 127.5
- norm_b: 1
- }
-}
-
-
-layer{
- name: "label"
- type: "kLabel"
- srclayers: "data"
-}
-
-layer{
- name: "fc1"
- type: "kInnerProduct"
- srclayers:"mnist"
- inner_product_param{
- num_output: 2500
- }
- param{
- name: "weight"
- init_method: kUniform
- low:-0.05
- high:0.05
- }
- param{
- name: "bias"
- init_method: kUniform
- low: -0.05
- high:0.05
- }
-}
-
-layer{
- name: "tanh1"
- type:"kTanh"
- srclayers:"fc1"
-}
-layer{
- name: "fc2"
- type: "kInnerProduct"
- srclayers:"tanh1"
- inner_product_param{
- num_output: 2000
- }
- param{
- name: "weight"
- init_method: kUniform
- low:-0.05
- high:0.05
- }
- param{
- name: "bias"
- init_method: kUniform
- low: -0.05
- high:0.05
- }
-}
-
-layer{
- name: "tanh2"
- type:"kTanh"
- srclayers:"fc2"
-}
-layer{
- name: "fc3"
- type: "kInnerProduct"
- srclayers:"tanh2"
- inner_product_param{
- num_output: 1500
- }
- param{
- name: "weight"
- init_method: kUniform
- low:-0.05
- high:0.05
- }
- param{
- name: "bias"
- init_method: kUniform
- low: -0.05
- high:0.05
- }
-
-}
-
-layer{
- name: "tanh3"
- type:"kTanh"
- srclayers:"fc3"
-}
-layer{
- name: "fc4"
- type: "kInnerProduct"
- srclayers:"tanh3"
- inner_product_param{
- num_output: 1000
- }
- param{
- name: "weight"
- init_method: kUniform
- low:-0.05
- high:0.05
- }
- param{
- name: "bias"
- init_method: kUniform
- low: -0.05
- high:0.05
- }
-
-}
-
-layer{
- name: "tanh4"
- type:"kTanh"
- srclayers:"fc4"
-}
-layer{
- name: "fc5"
- type: "kInnerProduct"
- srclayers:"tanh4"
- inner_product_param{
- num_output: 500
- }
- param{
- name: "weight"
- init_method: kUniform
- low:-0.05
- high:0.05
- }
- param{
- name: "bias"
- init_method: kUniform
- low: -0.05
- high:0.05
- }
-
-}
-
-layer{
- name: "tanh5"
- type:"kTanh"
- srclayers:"fc5"
-}
-layer{
- name: "fc6"
- type: "kInnerProduct"
- srclayers:"tanh5"
- inner_product_param{
- num_output: 10
- }
- param{
- name: "weight"
- init_method: kUniform
- low:-0.05
- high:0.05
- }
- param{
- name: "bias"
- init_method: kUniform
- low: -0.05
- high:0.05
- }
-}
-layer{
- name: "loss"
- type:"kSoftmaxLoss"
- softmaxloss_param{
- topk:1
- }
- srclayers:"fc6"
- srclayers:"label"
-}
-}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/mnist/mlp.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/mlp.conf b/examples/mnist/mlp.conf
index 9eeb1c6..3786c4f 100644
--- a/examples/mnist/mlp.conf
+++ b/examples/mnist/mlp.conf
@@ -3,19 +3,22 @@ train_steps: 10000
test_steps:10
test_frequency:60
display_frequency:30
+alg: kBackPropagation
updater{
- base_learning_rate: 0.001
- learning_rate_change_method: kStep
- learning_rate_change_frequency: 60
- gamma: 0.997
- param_type: "Param"
+ base_lr: 0.001
+ lr_change: kStep
+ type: kSGD
+ step_conf{
+ change_freq: 60
+ gamma: 0.997
+ }
}
neuralnet {
layer {
name: "data"
- type: "kShardData"
- data_param {
+ type: kShardData
+ sharddata_conf {
path: "examples/mnist/mnist_train_shard"
batchsize: 1000
}
@@ -24,8 +27,8 @@ layer {
layer {
name: "data"
- type: "kShardData"
- data_param {
+ type: kShardData
+ sharddata_conf {
path: "examples/mnist/mnist_test_shard"
batchsize: 1000
}
@@ -34,9 +37,9 @@ layer {
layer{
name:"mnist"
- type: "kMnistImage"
+ type: kMnist
srclayers: "data"
- mnist_param {
+ mnist_conf {
# sigma: 6
# alpha: 38
# gamma: 15
@@ -52,15 +55,15 @@ layer{
layer{
name: "label"
- type: "kLabel"
+ type: kLabel
srclayers: "data"
}
layer{
name: "fc1"
- type: "kInnerProduct"
+ type: kInnerProduct
srclayers:"mnist"
- inner_product_param{
+ innerproduct_conf{
num_output: 2500
}
param{
@@ -79,14 +82,14 @@ layer{
layer{
name: "tanh1"
- type:"kTanh"
+ type: kTanh
srclayers:"fc1"
}
layer{
name: "fc2"
- type: "kInnerProduct"
+ type: kInnerProduct
srclayers:"tanh1"
- inner_product_param{
+ innerproduct_conf{
num_output: 2000
}
param{
@@ -105,14 +108,14 @@ layer{
layer{
name: "tanh2"
- type:"kTanh"
+ type: kTanh
srclayers:"fc2"
}
layer{
name: "fc3"
- type: "kInnerProduct"
+ type: kInnerProduct
srclayers:"tanh2"
- inner_product_param{
+ innerproduct_conf{
num_output: 1500
}
param{
@@ -132,14 +135,14 @@ layer{
layer{
name: "tanh3"
- type:"kTanh"
+ type: kTanh
srclayers:"fc3"
}
layer{
name: "fc4"
- type: "kInnerProduct"
+ type: kInnerProduct
srclayers:"tanh3"
- inner_product_param{
+ innerproduct_conf{
num_output: 1000
}
param{
@@ -159,14 +162,14 @@ layer{
layer{
name: "tanh4"
- type:"kTanh"
+ type: kTanh
srclayers:"fc4"
}
layer{
name: "fc5"
- type: "kInnerProduct"
+ type: kInnerProduct
srclayers:"tanh4"
- inner_product_param{
+ innerproduct_conf{
num_output: 500
}
param{
@@ -186,14 +189,14 @@ layer{
layer{
name: "tanh5"
- type:"kTanh"
+ type: kTanh
srclayers:"fc5"
}
layer{
name: "fc6"
- type: "kInnerProduct"
+ type: kInnerProduct
srclayers:"tanh5"
- inner_product_param{
+ innerproduct_conf{
num_output: 10
}
param{
@@ -211,8 +214,8 @@ layer{
}
layer{
name: "loss"
- type:"kSoftmaxLoss"
- softmaxloss_param{
+ type:kSoftmaxLoss
+ softmaxloss_conf{
topk:1
}
srclayers:"fc6"
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/neuralnet/base_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/base_layer.h b/include/neuralnet/base_layer.h
index d7c4c3a..8b5b1bf 100644
--- a/include/neuralnet/base_layer.h
+++ b/include/neuralnet/base_layer.h
@@ -12,6 +12,7 @@
#include <thread>
#include "proto/model.pb.h"
+#include "proto/common.pb.h"
#include "utils/param.h"
#include "utils/common.h"
#include "utils/blob.h"
@@ -173,7 +174,7 @@ class Layer {
name_=name;
layer_proto_.set_name(name);
}
- virtual const string type() const {
+ virtual int type() const {
return layer_proto_.type();
}
/**
@@ -404,9 +405,7 @@ class DataLayer: public Layer{
return kNone;
}
- virtual int batchsize() const {
- return layer_proto_.data_param().batchsize();
- }
+ virtual int batchsize() const=0;
virtual const Record& sample() const {
return sample_;
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index bfbee8f..48cffa2 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -168,7 +168,7 @@ class LRNLayer: public Layer {
Blob<float> norm_;
};
-class MnistImageLayer: public ParserLayer {
+class MnistLayer: public ParserLayer {
public:
using Layer::Setup;
@@ -290,6 +290,9 @@ class ShardDataLayer: public DataLayer{
virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& srclayers);
virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers){};
virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
+ virtual int batchsize() const {
+ return layer_proto_.sharddata_conf().batchsize();
+ }
private:
shared_ptr<DataShard> shard_;
};
@@ -304,7 +307,9 @@ class LMDBDataLayer: public DataLayer{
virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
void ConvertDatumToSingleLableImageRecord(const Datum& datum,
SingleLabelImageRecord* record);
-
+ virtual int batchsize() const {
+ return layer_proto_.lmdbdata_conf().batchsize();
+ }
private:
MDB_env* mdb_env_;
MDB_dbi mdb_dbi_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/utils/blob.h
----------------------------------------------------------------------
diff --git a/include/utils/blob.h b/include/utils/blob.h
index 8234b28..97b4ee7 100644
--- a/include/utils/blob.h
+++ b/include/utils/blob.h
@@ -43,7 +43,7 @@
#include <memory>
#include <vector>
#include <glog/logging.h>
-#include "proto/model.pb.h"
+#include "proto/common.pb.h"
using std::shared_ptr;
using std::vector;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/utils/factory.h
----------------------------------------------------------------------
diff --git a/include/utils/factory.h b/include/utils/factory.h
index 3201853..1e3069c 100644
--- a/include/utils/factory.h
+++ b/include/utils/factory.h
@@ -26,7 +26,7 @@ class Factory {
/**
* Register functions to create user defined classes.
* This function is called by the REGISTER_FACTORY macro.
- *
+ *
* @param id Identifier of the creating function/class
* @param func a function that creates a layer instance
*/
@@ -37,19 +37,43 @@ class Factory {
str2func_[id] = func;
}
/**
- * create a layer instance by providing its type
- *
- * @param id The identifier of the layer to be created
+ * Register functions to create user defined classes.
+ * This function is called by the REGISTER_FACTORY macro.
+ *
+ * @param id Identifier of the creating function/class
+ * @param func a function that creates a layer instance
+ */
+ inline void Register(int id,
+ const std::function<T*(void)>& func) {
+ CHECK(id2func_.find(id) == id2func_.end())
+ << "The id has been registered by another function";
+ id2func_[id] = func;
+ }
+
+ /**
+ * create an instance by providing its id
+ *
+ * @param id
*/
inline T* Create(const std::string& id) {
CHECK(str2func_.find(id) != str2func_.end())
<< "The creation function for " << id << " has not been registered";
return str2func_[id]();
}
-
+ /**
+ * create an instance by providing its id
+ *
+ * @param id
+ */
+ inline T* Create(int id) {
+ CHECK(id2func_.find(id) != id2func_.end())
+ << "The creation function for " << id << " has not been registered";
+ return id2func_[id]();
+ }
private:
// Map that stores the registered creation functions
std::map<std::string, std::function<T*(void)>> str2func_;
+ std::map<int, std::function<T*(void)>> id2func_;
};
#endif // SINGA_UTILS_FACTORY_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/neuralnet/base_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/base_layer.cc b/src/neuralnet/base_layer.cc
index 63ac7a0..95628cb 100644
--- a/src/neuralnet/base_layer.cc
+++ b/src/neuralnet/base_layer.cc
@@ -73,7 +73,7 @@ void BridgeDstLayer::SetupAfterPartition(){
/************* Implementation for ConcateLayer ***********/
void ConcateLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
- size_t concate_dim=proto.concate_param().concate_dimension();
+ size_t concate_dim=proto.concate_conf().concate_dimension();
CHECK_GE(concate_dim,0);
CHECK_GT(srclayers.size(),1);
vector<int> shape=srclayers[0]->data(this).shape();
@@ -131,9 +131,10 @@ void PrefetchLayer::ComputeFeature(Phase phase,
void PrefetchLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
Factory<Layer>* factory=Singleton<Factory<Layer>>::Instance();
- CHECK_GE(proto.sublayers_size(), 1);
+ const auto& sublayers=proto.prefetch_conf().sublayers();
+ CHECK_GE(sublayers.size(), 1);
map<string, SLayer> layers;
- for(auto const &p:proto.sublayers()){
+ for(auto const &p:sublayers){
auto layer=shared_ptr<Layer>(factory->Create(p.type()));
layer->Init(p);
sublayers_.push_back(layer);
@@ -141,7 +142,7 @@ void PrefetchLayer::Setup(const LayerProto& proto,
}
// TODO topology sort layers
auto layer=sublayers_.begin();
- for(auto const &p:proto.sublayers()){
+ for(auto const &p:sublayers){
std::vector<SLayer> src;
for(auto const &srcname: p.srclayers()){
src.push_back(layers[srcname]);
@@ -180,8 +181,8 @@ PrefetchLayer::~PrefetchLayer(){
/************* Implementation for SliceLayer****************/
void SliceLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
- slice_dim_=proto.slice_param().slice_dimension();
- slice_num_=proto.slice_param().slice_num();
+ slice_dim_=proto.slice_conf().slice_dimension();
+ slice_num_=proto.slice_conf().slice_num();
CHECK_GE(slice_dim_,0);
CHECK_EQ(slice_num_, dstlayers_.size());
data_.Reshape(srclayers[0]->data(this).shape());
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index 04ce72a..db13824 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -18,12 +18,12 @@ namespace singa {
void ConvolutionLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
CHECK_EQ(srclayers.size(),1);
- ConvolutionProto conv_param=proto.convolution_param();
- kernel_=conv_param.kernel();
+ ConvolutionProto conv_conf=proto.convolution_conf();
+ kernel_=conv_conf.kernel();
CHECK_GT(kernel_, 0) << "Filter size cannot be zero.";
- pad_=conv_param.pad();
- stride_=conv_param.stride();
- num_filters_=conv_param.num_filters();
+ pad_=conv_conf.pad();
+ stride_=conv_conf.stride();
+ num_filters_=conv_conf.num_filters();
const vector<int>& srcshape=srclayers[0]->data(this).shape();
int dim=srcshape.size();
CHECK_GT(dim, 2);
@@ -55,8 +55,8 @@ void ConvolutionLayer::SetupAfterPartition(const LayerProto& proto,
const vector<int> &shape,
const vector<SLayer>& srclayers){
LayerProto newproto(proto);
- ConvolutionProto *conv_param=newproto.mutable_convolution_param();
- conv_param->set_num_filters(shape[1]);
+ ConvolutionProto *conv_conf=newproto.mutable_convolution_conf();
+ conv_conf->set_num_filters(shape[1]);
Setup(newproto, srclayers);
}
@@ -128,7 +128,7 @@ void DropoutLayer::Setup(const LayerProto& proto,
data_.ReshapeLike(srclayers[0]->data(this));
grad_.ReshapeLike(*srclayers[0]->mutable_grad(this));
mask_.Reshape(srclayers[0]->data(this).shape());
- pdrop_=proto.dropout_param().dropout_ratio();
+ pdrop_=proto.dropout_conf().dropout_ratio();
}
void DropoutLayer::SetupAfterPartition(const LayerProto& proto,
@@ -167,7 +167,7 @@ void InnerProductLayer::Setup(const LayerProto& proto,
const auto& src=srclayers[0]->data(this);
batchsize_=src.shape()[0];
vdim_=src.count()/batchsize_;
- hdim_=proto.inner_product_param().num_output();
+ hdim_=proto.innerproduct_conf().num_output();
data_.Reshape(vector<int>{batchsize_, hdim_});
grad_.ReshapeLike(data_);
Factory<Param>* factory=Singleton<Factory<Param>>::Instance();
@@ -180,7 +180,7 @@ void InnerProductLayer::SetupAfterPartition(const LayerProto& proto,
const vector<int> &shape,
const vector<SLayer>& srclayers){
LayerProto newproto(proto);
- InnerProductProto * innerproto=newproto.mutable_inner_product_param();
+ InnerProductProto * innerproto=newproto.mutable_innerproduct_conf();
innerproto->set_num_output(shape[1]);
Setup(newproto, srclayers);
}
@@ -301,16 +301,16 @@ void LMDBDataLayer::Setup(const LayerProto& proto,
CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed";
CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS); // 1TB
CHECK_EQ(mdb_env_open(mdb_env_,
- proto.data_param().path().c_str(),
+ proto.lmdbdata_conf().path().c_str(),
MDB_RDONLY, 0664), MDB_SUCCESS) << "cannot open lmdb "
- << proto.data_param().path();
+ << proto.lmdbdata_conf().path();
CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS)
<< "mdb_txn_begin failed";
CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS)
<< "mdb_open failed";
CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS)
<< "mdb_cursor_open failed";
- LOG(INFO) << "Opening lmdb " << proto.data_param().path();
+ LOG(INFO) << "Opening lmdb " << proto.lmdbdata_conf().path();
CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST),
MDB_SUCCESS) << "mdb_cursor_get failed";
@@ -324,20 +324,20 @@ void LMDBDataLayer::Setup(const LayerProto& proto,
SingleLabelImageRecord* record=sample_.mutable_image();
ConvertDatumToSingleLableImageRecord(datum, record);
- batchsize_=proto.data_param().batchsize();
+ batchsize_=batchsize();
records_.resize(batchsize_);
- random_skip_=proto.data_param().random_skip();
+ random_skip_=proto.lmdbdata_conf().random_skip();
}
/***************** Implementation for LRNLayer *************************/
void LRNLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
CHECK_EQ(srclayers.size(),1);
- lsize_ = proto.lrn_param().local_size();
+ lsize_ = proto.lrn_conf().local_size();
CHECK_EQ(lsize_ % 2, 1) << "LRN only supports odd values for Localvol";
- knorm_=proto.lrn_param().knorm();
- alpha_ = proto.lrn_param().alpha();
- beta_ = proto.lrn_param().beta();
+ knorm_=proto.lrn_conf().knorm();
+ alpha_ = proto.lrn_conf().alpha();
+ beta_ = proto.lrn_conf().beta();
const vector<int>& s=srclayers[0]->data(this).shape();
data_.Reshape(s);
@@ -381,7 +381,7 @@ void LRNLayer::ComputeGradient(const vector<SLayer>& srclayers) {
/**************** Implementation for MnistImageLayer******************/
-void MnistImageLayer::ParseRecords(Phase phase,
+void MnistLayer::ParseRecords(Phase phase,
const vector<Record>& records, Blob<float>* blob){
LOG_IF(ERROR, records.size()==0)<<"Empty records to parse";
int ndim=records.at(0).image().shape_size();
@@ -448,20 +448,20 @@ void MnistImageLayer::ParseRecords(Phase phase,
}
CHECK_EQ(dptr, blob->mutable_cpu_data()+blob->count());
}
-void MnistImageLayer::Setup(const LayerProto& proto,
+void MnistLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
CHECK_EQ(srclayers.size(),1);
int batchsize=static_cast<DataLayer*>(srclayers[0].get())->batchsize();
Record sample=static_cast<DataLayer*>(srclayers[0].get())->sample();
- kernel_=proto.mnist_param().kernel();
- sigma_=proto.mnist_param().sigma();
- alpha_=proto.mnist_param().alpha();
- beta_=proto.mnist_param().beta();
- gamma_=proto.mnist_param().gamma();
- resize_=proto.mnist_param().resize();
- norm_a_=proto.mnist_param().norm_a();
- norm_b_=proto.mnist_param().norm_b();
- elastic_freq_=proto.mnist_param().elastic_freq();
+ kernel_=proto.mnist_conf().kernel();
+ sigma_=proto.mnist_conf().sigma();
+ alpha_=proto.mnist_conf().alpha();
+ beta_=proto.mnist_conf().beta();
+ gamma_=proto.mnist_conf().gamma();
+ resize_=proto.mnist_conf().resize();
+ norm_a_=proto.mnist_conf().norm_a();
+ norm_b_=proto.mnist_conf().norm_b();
+ elastic_freq_=proto.mnist_conf().elastic_freq();
int ndim=sample.image().shape_size();
CHECK_GE(ndim,2);
@@ -478,11 +478,11 @@ void MnistImageLayer::Setup(const LayerProto& proto,
void PoolingLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
CHECK_EQ(srclayers.size(),1);
- PoolingProto pool_param = proto.pooling_param();
- kernel_=pool_param.kernel();
- stride_=pool_param.stride();
+ PoolingProto pool_conf = proto.pooling_conf();
+ kernel_=pool_conf.kernel();
+ stride_=pool_conf.stride();
CHECK_LT(pad_, kernel_);
- pool_=proto.pooling_param().pool();
+ pool_=proto.pooling_conf().pool();
CHECK(pool_ == PoolingProto_PoolMethod_AVE
|| pool_ == PoolingProto_PoolMethod_MAX)
<< "Padding implemented only for average and max pooling.";
@@ -628,9 +628,9 @@ void RGBImageLayer::ParseRecords(Phase phase,
void RGBImageLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
CHECK_EQ(srclayers.size(),1);
- scale_=proto.rgbimage_param().scale();
- cropsize_=proto.rgbimage_param().cropsize();
- mirror_=proto.rgbimage_param().mirror();
+ scale_=proto.rgbimage_conf().scale();
+ cropsize_=proto.rgbimage_conf().cropsize();
+ mirror_=proto.rgbimage_conf().mirror();
int batchsize=static_cast<DataLayer*>(srclayers[0].get())->batchsize();
Record sample=static_cast<DataLayer*>(srclayers[0].get())->sample();
vector<int> shape;
@@ -645,15 +645,15 @@ void RGBImageLayer::Setup(const LayerProto& proto,
}
data_.Reshape(shape);
mean_.Reshape({shape[1],shape[2],shape[3]});
- if(proto.rgbimage_param().has_meanfile()){
- if(proto.rgbimage_param().meanfile().find("binaryproto")!=string::npos){
+ if(proto.rgbimage_conf().has_meanfile()){
+ if(proto.rgbimage_conf().meanfile().find("binaryproto")!=string::npos){
BlobProto tmp;
- ReadProtoFromBinaryFile(proto.rgbimage_param().meanfile().c_str(), &tmp);
+ ReadProtoFromBinaryFile(proto.rgbimage_conf().meanfile().c_str(), &tmp);
CHECK_EQ(mean_.count(), tmp.data_size());
memcpy(mean_.mutable_cpu_data(), tmp.data().data(), sizeof(float)*tmp.data_size());
}else{
SingleLabelImageRecord tmp;
- ReadProtoFromBinaryFile(proto.rgbimage_param().meanfile().c_str(), &tmp);
+ ReadProtoFromBinaryFile(proto.rgbimage_conf().meanfile().c_str(), &tmp);
CHECK_EQ(mean_.count(), tmp.data_size());
memcpy(mean_.mutable_cpu_data(), tmp.data().data(), sizeof(float)*tmp.data_size());
}
@@ -685,14 +685,14 @@ void ShardDataLayer::ComputeFeature(Phase phase, const vector<SLayer>& srclayers
void ShardDataLayer::Setup(const LayerProto& proto,
const vector<SLayer>& srclayers){
- shard_= std::make_shared<DataShard>(proto.data_param().path(),
+ shard_= std::make_shared<DataShard>(proto.sharddata_conf().path(),
DataShard::kRead);
string key;
shard_->Next(&key, &sample_);
- batchsize_=proto.data_param().batchsize();
+ batchsize_=proto.sharddata_conf().batchsize();
records_.resize(batchsize_);
- random_skip_=proto.data_param().random_skip();
+ random_skip_=proto.sharddata_conf().random_skip();
}
/*******************Implementation of TanLayer***************************/
void TanhLayer::Setup(const LayerProto& proto,
@@ -729,9 +729,9 @@ void SoftmaxLossLayer::Setup(const LayerProto& proto,
data_.Reshape(srclayers[0]->data(this).shape());
batchsize_=data_.shape()[0];
dim_=data_.count()/batchsize_;
- topk_=proto.softmaxloss_param().topk();
+ topk_=proto.softmaxloss_conf().topk();
metric_.Reshape(vector<int>{2});
- scale_=proto.softmaxloss_param().scale();
+ scale_=proto.softmaxloss_conf().scale();
}
void SoftmaxLossLayer::SetupAfterPartition(const LayerProto& proto,
const vector<int> &shape,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index b88d4a6..2240499 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -1,6 +1,7 @@
#include <algorithm>
#include <queue>
+#include "proto/model.pb.h"
#include "neuralnet/neuralnet.h"
#include "utils/singleton.h"
#include "utils/factory.h"
@@ -8,29 +9,33 @@
#include "utils/cluster.h"
namespace singa {
-#define CreateLayer(id) CreateInstance(id, Layer)
+#define LayerT(x) LayerProto_LayerType_k##x
+
+#define RegisterLayer(factory, id) \
+ factory->Register(LayerProto_LayerType_k##id,\
+ CreateInstance(id##Layer, Layer))
void NeuralNet::RegisterLayers(){
Factory<Layer>* factory=Singleton<Factory<Layer>>::Instance();
- factory->Register("kBridgeDst", CreateLayer(BridgeDstLayer));
- factory->Register("kBridgeSrc", CreateLayer(BridgeSrcLayer));
- factory->Register("kConvolution", CreateLayer(ConvolutionLayer));
- factory->Register("kConcate", CreateLayer(ConcateLayer));
- factory->Register("kDropout", CreateLayer(DropoutLayer));
- factory->Register("kInnerProduct", CreateLayer(InnerProductLayer));
- factory->Register("kLabel", CreateLayer(LabelLayer));
- factory->Register("kLMDBData", CreateLayer(LMDBDataLayer));
- factory->Register("kLRN", CreateLayer(LRNLayer));
- factory->Register("kMnistImage", CreateLayer(MnistImageLayer));
- factory->Register("kPooling", CreateLayer(PoolingLayer));
- factory->Register("kPrefetch", CreateLayer(PrefetchLayer));
- factory->Register("kRGBImage", CreateLayer(RGBImageLayer));
- factory->Register("kReLU", CreateLayer(ReLULayer));
- factory->Register("kShardData", CreateLayer(ShardDataLayer));
- factory->Register("kSlice", CreateLayer(SliceLayer));
- factory->Register("kSoftmaxLoss", CreateLayer(SoftmaxLossLayer));
- factory->Register("kSplit", CreateLayer(SplitLayer));
- factory->Register("kTanh", CreateLayer(TanhLayer));
+ RegisterLayer(factory, BridgeDst);
+ RegisterLayer(factory, BridgeSrc);
+ RegisterLayer(factory, Convolution);
+ RegisterLayer(factory, Concate);
+ RegisterLayer(factory, Dropout);
+ RegisterLayer(factory, InnerProduct);
+ RegisterLayer(factory, Label);
+ RegisterLayer(factory, LMDBData);
+ RegisterLayer(factory, LRN);
+ RegisterLayer(factory, Mnist);
+ RegisterLayer(factory, Prefetch);
+ RegisterLayer(factory, Pooling);
+ RegisterLayer(factory, RGBImage);
+ RegisterLayer(factory, ReLU);
+ RegisterLayer(factory, ShardData);
+ RegisterLayer(factory, Slice);
+ RegisterLayer(factory, SoftmaxLoss);
+ RegisterLayer(factory, Split);
+ RegisterLayer(factory, Tanh);
}
shared_ptr<NeuralNet> NeuralNet::SetupNeuralNet(const NetProto& np, Phase phase,
int group_size){
@@ -139,7 +144,7 @@ void NeuralNet::PartitionNeuralNet(){
graph_=CreatePartitonedGraph(layers_, name2layer_);
//DLOG(ERROR)<<"pure graph after partition\n"<<graph_.ToString();
map<string, shared_ptr<Layer>> name2layer(name2layer_);
- map<string, vector<shared_ptr<Layer>>> share_param_layers;
+ map<string, vector<shared_ptr<Layer>>> share_conf_layers;
name2layer_.clear();
layers_.clear();
int gsize=group_size_;
@@ -149,23 +154,25 @@ void NeuralNet::PartitionNeuralNet(){
LayerProto proto;
proto.set_name(node->name());
proto.set_partitionid(node->val().partitionid);
- const string& origin=node->val().origin;
+ string origin=node->val().origin;
if (origin=="kSlice"){
- proto.set_type(origin);
- SliceProto *slice=proto.mutable_slice_param();
+ proto.set_type(LayerT(Slice));
+ SliceProto *slice=proto.mutable_slice_conf();
slice->set_slice_dimension(node->val().slice_dimension);
slice->set_slice_num(node->dstnodes().size());
}else if(origin== "kConcate"){
- proto.set_type(origin);
- ConcateProto *concate=proto.mutable_concate_param();
+ proto.set_type(LayerT(Concate));
+ ConcateProto *concate=proto.mutable_concate_conf();
concate->set_concate_dimension(node->val().concate_dimension);
concate->set_concate_num(node->srcnodes().size());
}else if(origin=="kSplit"){
- proto.set_type(origin);
- SplitProto *split=proto.mutable_split_param();
+ proto.set_type(LayerT(Split));
+ SplitProto *split=proto.mutable_split_conf();
split->set_num_splits(node->dstnodes().size());
- }else if(origin=="kBridgeSrc" || origin== "kBridgeDst"){
- proto.set_type(origin);
+ }else if(origin=="kBridgeSrc"){
+ proto.set_type(LayerT(BridgeSrc));
+ }else if(origin =="kBridgeDst"){
+ proto.set_type(LayerT(BridgeDst));
}else{
CHECK(name2layer.find(node->val().origin)!=name2layer_.end())
<<"Unkown origin for node "<<node->val().origin;
@@ -191,7 +198,7 @@ void NeuralNet::PartitionNeuralNet(){
layer->set_name(node->name());
newlayer=layer;
if(oldlayer->partition_type()==kDataPartition)
- share_param_layers[node->val().origin].push_back(newlayer);
+ share_conf_layers[node->val().origin].push_back(newlayer);
}
newlayer->set_partitionid(node->val().partitionid);
}
@@ -226,15 +233,15 @@ void NeuralNet::PartitionNeuralNet(){
// share Params for layers generated from the same origin layer due to
// data partition
- for(auto & entry: share_param_layers){
+ for(auto & entry: share_conf_layers){
auto layers= entry.second;
auto owner=layers.begin();
- auto owner_params=(*owner)->GetParams();
+ auto owner_confs=(*owner)->GetParams();
for(auto it=owner+1; it!=layers.end();it++){
auto params=(*it)->GetParams();
- CHECK_EQ(params.size(), owner_params.size());
+ CHECK_EQ(params.size(), owner_confs.size());
for(size_t i=0;i<params.size();i++)
- params.at(i)->ShareData(owner_params.at(i));
+ params.at(i)->ShareData(owner_confs.at(i));
}
}
LOG(INFO)<<"network graph after partition layers\n"<<ToString();
@@ -369,7 +376,6 @@ std::string NeuralNet::ToString(){
map<string, string> info;
for(auto layer: layers_){
info[layer->name()]=IntVecToString(layer->shape(nullptr));
- string type=layer->type();
}
return graph_.ToString(info);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/proto/common.proto
----------------------------------------------------------------------
diff --git a/src/proto/common.proto b/src/proto/common.proto
new file mode 100644
index 0000000..6bc0919
--- /dev/null
+++ b/src/proto/common.proto
@@ -0,0 +1,80 @@
+package singa;
+
+enum MsgType {
+ kGet = 0;
+ kPut = 1;
+ kSync = 2;
+ kUpdate = 3;
+ kSyncRequest = 4;
+ kSyncResponse = 5;
+ kStop = 6;
+ kData = 7;
+ kRGet = 8;
+ kRUpdate = 9;
+ kConnect = 10;
+ kMetric = 11;
+};
+
+enum EntityType {
+ kWorkerParam = 0;
+ kWorkerLayer = 1;
+ kServer = 2;
+ kStub = 3;
+ kRuntime = 4;
+};
+
+enum ShareOption {
+ kValueOnly = 0;
+ kWhole = 1;
+};
+
+message BlobProtos {
+ repeated BlobProto blobs = 1;
+ repeated int32 ids = 2;
+ repeated string names = 3;
+}
+
+enum ConnectionType {
+ kOneToOne = 0;
+ kOneToAll = 1;
+}
+
+// to import caffe's lmdb dataset
+message Datum {
+ optional int32 channels = 1;
+ optional int32 height = 2;
+ optional int32 width = 3;
+ // the actual image data, in bytes
+ optional bytes data = 4;
+ optional int32 label = 5;
+ // Optionally, the datum could also hold float data.
+ repeated float float_data = 6;
+ // If true data contains an encoded image that need to be decoded
+ optional bool encoded = 7 [default = false];
+}
+
+message BlobProto {
+ optional int32 num = 1 [default = 0];
+ optional int32 channels = 2 [default = 0];
+ optional int32 height = 3 [default = 0];
+ optional int32 width = 4 [default = 0];
+ repeated float data = 5 [packed = true];
+ repeated float diff = 6 [packed = true];
+}
+
+message Record {
+ enum Type {
+ // each record contains image raw feature and its label.
+ kSingleLabelImage = 0;
+ }
+ optional Type type = 1 [default = kSingleLabelImage];
+ // configuration for
+ optional SingleLabelImageRecord image = 2;
+}
+
+message SingleLabelImageRecord {
+ repeated int32 shape = 1;
+ optional int32 label = 2;
+ optional bytes pixel = 3;
+ repeated float data = 4;
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 1e12087..4256491 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -3,76 +3,68 @@ enum Phase {
kTrain = 0;
kValidation = 1;
kTest= 2;
+ // postivie phase for contrastive divergence algorithm
kPositive = 3;
+ // negative phase for contrastive divergence algorithm
kNegative = 4;
}
-enum ShareOption {
- kValueOnly = 0;
- kWhole = 1;
-};
-message ModelProto{
+message ModelProto {
+ // model name, e.g., "cifar10-dcnn", "mnist-mlp"
required string name = 1;
- // start display after this num steps
- optional int32 display_after_steps = 6 [default = 0];
- // frequency of display
- optional int32 display_frequency = 7 [default = 0];
-
- // start validation after this num steps
- optional int32 validation_after_steps = 10 [default = 0];
- // frequency of validation
- optional int32 validation_frequency = 11 [default = 0];
-
- // start test after this num steps
- optional int32 test_after_steps = 13 [default = 0];
- // frequency of test
- optional int32 test_frequency = 14 [default = 0];
- optional int32 checkpoint_after_steps = 15 [default = 0];
- // frequency of test
- optional int32 checkpoint_frequency = 16 [default = 0];
- optional bool prefetch=18[default = true];
+ // frequency of displaying training info
+ required int32 display_frequency = 3 ;
// total num of steps for training
- required int32 train_steps = 20;
- // total num of steps for validation
- optional int32 validation_steps=21 [default=0];
- // total num of steps for test
- optional int32 test_steps=22 [default=0];
- // last snapshot step
- optional int32 step=29;
-
- required UpdaterProto updater=31;
- // There are two basic algorithms for calculating gradients.
- // Different deep learning models use different algorithms.
+ required int32 train_steps = 5;
+ // configuration of SGD updater, including learning rate, etc.
+ required UpdaterProto updater = 7;
enum GradCalcAlg {
+ // BP algorithm for feed-forward models, e.g., CNN, MLP, RNN
kBackPropagation = 1;
+ // CD algorithm for RBM, DBM etc., models
kContrastiveDivergence = 2;
}
- required GradCalcAlg alg= 32 [default = kBackPropagation];
- required NetProto neuralnet = 40;
- optional bool debug=41 [default=false];
- optional int32 warmup_steps=50 [default=0];
+ // gradient calculation algorithm
+ required GradCalcAlg alg = 8 [default = kBackPropagation];
+ required NetProto neuralnet = 9;
+
+ // total num of steps for validation
+ optional int32 validation_steps = 30 [default = 0];
+ // total num of steps for test
+ optional int32 test_steps = 31 [default = 0];
+ // frequency of validation
+ optional int32 validation_frequency = 32;
+ // frequency of test
+ optional int32 test_frequency = 33 [default = 0];
+ // frequency of checkpoint
+ optional int32 checkpoint_frequency = 34 [default = 0];
+ // send parameters to servers after training for this num of steps
+ optional int32 warmup_steps = 35 [default = 0];
+
+ // start display after this num steps
+ optional int32 display_after_steps = 60[default = 0];
+ // start checkpoint after this num steps
+ optional int32 checkpoint_after_steps = 61 [default = 0];
+ // start test after this num steps
+ optional int32 test_after_steps = 62 [default = 0];
+// start validation after this num steps
+ optional int32 validation_after_steps = 63 [default = 0];
+ // last snapshot step
+ optional int32 step = 64 [default = 0];
+ // display debug info
+ optional bool debug = 65 [default = false];
}
message NetProto {
repeated LayerProto layer = 1;
+ // partitioning type for parallelism
optional PartitionType partition_type = 3 [default = kNone];
}
+// weight matrix should be defined before bias vector
message ParamProto {
- // for the program to identify it and share among layers.
- // e.g., "conv1_weight","fc_bias"
- required string name = 1;
- optional int32 id=2;
- // in most situations, user do not need to config this,
- // the program will calculate it
- repeated int32 shape = 3;
- // split the parameter into multiple sub params for serialzation and
- // transferring (Google Protobuf has size limit)
- optional int32 split_threshold = 4 [default = 5000000];
- // partition dimension, -1 for no partition
- optional int32 partition_dim = 5 [default = -1];
- optional int32 owner = 6;
enum InitMethod {
+ // fix the values of all parameters a constant in the value field
kConstant = 0;
// sample gaussian with std and mean
kGaussian = 1;
@@ -87,31 +79,38 @@ message ParamProto {
// a=sqrt(3)/sqrt(fan_in), range is [-a, +a]; no need to set value=sqrt(3),
// the program will multiply it.
kUniformSqrtFanIn = 5;
- // from Theano MLP tutorial, let a=1/sqrt(fan_in+fan_out). for tanh
- // activation, range is [-6a, +6a], for sigmoid activation, range is
- // [-24a, +24a], put the scale factor to value field.
+ // from Theano MLP tutorial, let a=sqrt(6/(fan_in+fan_out)). for tanh
+ // activation, range is [-a, +a], for sigmoid activation, range is
+ // [-4a, +4a], put the scale factor to value field.
// <a href="http://deeplearning.net/tutorial/mlp.html"> Theano MLP</a>
kUniformSqrtFanInOut = 6;
}
- optional InitMethod init_method = 7 [default = kConstant];
+ required InitMethod init_method = 1 [default = kGaussian];
// constant init
- optional float value = 8 [default = 1];
+ optional float value = 5 [default = 1];
// for uniform sampling
- optional float low = 9 [default = -1];
- optional float high = 10 [default = 1];
+ optional float low = 6 [default = -1];
+ optional float high = 7 [default = 1];
// for gaussian sampling
- optional float mean = 11 [default = 0];
- optional float std = 12 [default = 1];
+ optional float mean = 8 [default = 0];
+ optional float std = 9 [default = 1];
// multiplied on the global learning rate.
- optional float learning_rate_multiplier = 13 [default = 1];
+ optional float learning_rate_multiplier = 15 [default = 1];
// multiplied on the global weight decay.
- optional float weight_decay_multiplier = 14 [default = 1];
-}
+ optional float weight_decay_multiplier = 16 [default = 1];
+ // partition dimension, -1 for no partition
+ optional int32 partition_dim = 30 [default = -1];
+ // usually, the program will infer the param shape
+ repeated int32 shape = 31;
-message BlobProtos {
- repeated BlobProto blobs = 1;
- repeated int32 ids = 2;
- repeated string names = 3;
+ // used for identifying the same params from diff models and display deug info
+ optional string name = 61 [default = "param"];
+ // used interally
+ optional int32 id = 62;
+ // parameter slice limit (Google Protobuf also has size limit)
+ optional int32 split_threshold = 63 [default = 5000000];
+ // used internally
+ optional int32 owner = 64 [default = -1];
}
enum PartitionType{
@@ -120,159 +119,212 @@ enum PartitionType{
kNone=2;
}
-enum ConnectionType{
- kOneToOne=0;
- kOneToAll=1;
-}
-
message LayerProto {
- required string name = 1; // the layer name
- required string type = 2; // the layer type from the enum above
- repeated string srclayers=3;
- optional int32 locationid=4 [default=0]; // todo make locationID an array
- optional int32 partitionid=5 [default=0];
- optional PartitionType partition_type=6 [default=kNone];
- optional string datablob=7;
- // can be pos/neg neuron value for CD, neuron value/grad for BP
- //repeated DAryProto ary = 10;
- repeated string share_ary = 11;
+ // the layer name used for identification
+ required string name = 1;
+ enum LayerType{
+ kBridgeSrc = 15;
+ kBridgeDst = 16;
+ kConvolution = 1;
+ kConcate = 2;
+ kShardData = 3;
+ kDropout = 4;
+ kInnerProduct = 5;
+ kLabel = 18;
+ kLMDBData = 17;
+ kLRN = 6;
+ kMnist = 7;
+ kPooling = 8;
+ kPrefetch = 19;
+ kReLU = 9;
+ kRGBImage = 10;
+ kSoftmaxLoss = 11;
+ kSlice = 12;
+ kSplit = 13;
+ kTanh = 14;
+ }
+ // source layer names
+ repeated string srclayers = 3;
// parameters, e.g., weight matrix or bias vector
repeated ParamProto param = 12;
+ // all layers are included in the net structure for training phase by default.
+ // some layers like data layer for loading test data are not used by training
+ // phase should be removed by setting the exclude field.
+ repeated Phase exclude = 15;
+ // the layer type from the enum above
+ required LayerType type = 20;
+ // configuration for convolution layer
+ optional ConvolutionProto convolution_conf = 30;
+ // configuration for concatenation layer
+ optional ConcateProto concate_conf = 31;
+ // configuration for dropout layer
+ optional DropoutProto dropout_conf = 33;
+ // configuration for inner product layer
+ optional InnerProductProto innerproduct_conf = 34;
+ // configuration for local response normalization layer
+ optional DataProto lmdbdata_conf = 35;
+ // configuration for local response normalization layer
+ optional LRNProto lrn_conf = 45;
+ // configuration for mnist parser layer
+ optional MnistProto mnist_conf= 36;
+ // configuration for pooling layer
+ optional PoolingProto pooling_conf = 37;
+ // configuration for prefetch layer
+ optional PrefetchProto prefetch_conf = 44;
+ // configuration for rectified linear unit layer
+ optional ReLUProto relu_conf = 38;
+ // configuration for rgb image parser layer
+ optional RGBImageProto rgbimage_conf = 39;
+ // configuration for data layer
+ optional DataProto sharddata_conf = 32;
+ // configuration for slice layer
+ optional SliceProto slice_conf = 41;
+ // configuration for softmax loss layer
+ optional SoftmaxLossProto softmaxloss_conf = 40;
+ // configuration for split layer
+ optional SplitProto split_conf = 42;
+ // configuration for tanh layer
+ optional TanhProto tanh_conf = 43;
+ // partition type which overrides the partition type for neural net
+ optional PartitionType partition_type = 59;
+ optional string datablob = 58 [default = "unknow"];
+
// names of parameters shared from other layers
- repeated string share_param = 13;
- // All layers are included in the net structure for training phase by default.
- // Layers, e.g., computing performance metrics for test phase, can be excluded
- // by this field which defines in which phase this layer should be excluded.
- repeated Phase exclude = 20;
- // hyper-parameters for layers
- optional ConvolutionProto convolution_param = 21;
- optional ConcateProto concate_param = 31;
- optional DataProto data_param = 22;
- optional DropoutProto dropout_param = 23;
- optional InnerProductProto inner_product_param = 24;
- optional LRNProto lrn_param = 25;
- optional MnistProto mnist_param= 26;
- optional PoolingProto pooling_param = 27;
- optional ReLUProto relu_param = 28;
- optional SoftmaxLossProto softmaxloss_param = 29;
- optional TanhProto tanh_param = 30;
- optional SliceProto slice_param = 32;
- optional SplitProto split_param = 33;
- optional RGBImage rgbimage_param = 34;
- repeated LayerProto sublayers = 35;
+ repeated string share_param = 60;
+ // TODO(wangwei): make location ID an array
+ optional int32 locationid = 61 [default = 0];
+ optional int32 partitionid = 62 [default = 0];
+}
+
+message RGBImageProto {
+ // scale factor for each pixel
+ optional float scale = 1 [default = 1.0];
+ // size after cropping
+ optional int32 cropsize = 2 [default = 0];
+ // mirror the image
+ optional bool mirror = 3 [default = false];
+ // meanfile path
+ optional string meanfile = 4 [default = ""];
}
-message RGBImage {
- optional float scale=1 [default=1.0];
- optional int32 cropsize=2 [default=0];
- optional bool mirror=3 [default=false];
- optional string meanfile=4 [default=""];
+message PrefetchProto{
+ repeated LayerProto sublayers = 1;
}
-message SplitProto{
- required int32 num_splits=1;
+
+message SplitProto {
+ optional int32 num_splits = 1 [default =1];
}
// scaled tan: A*tan(B*x)
message TanhProto {
+ // A of A*tan(B*x)
optional float outer_scale = 1 [default = 1.0];
+ // B of A*tan(B*x)
optional float inner_scale = 2 [default = 1.0];
}
-// Message that stores parameters used by SoftmaxLossProto
message SoftmaxLossProto {
- // accuracy is not comptued by default, unless topk>0;
- // When computing accuracy, count as correct by comparing the true label to
- // the top k scoring classes.
+ // computing accuracy against topk results
optional int32 topk = 1 [default = 1];
- optional float scale= 2 [default = 1];
+ // loss scale factor
+ optional float scale= 30 [default = 1];
}
-// Message that stores parameters used by ConvolutionLayer
message ConvolutionProto {
- required uint32 num_filters = 1; // The number of outputs for the layer
- optional bool bias_term = 2 [default = true]; // whether to have bias terms
- // Pad, kernel size, and stride are all given as a single value for equal
- // dimensions in height and width or as Y, X pairs.
- // The padding size (equal in Y, X)
- optional uint32 pad = 3 [default = 0];
- // The stride (equal in Y, X)
- optional uint32 stride = 4 [default = 1];
- // The kernel height/width
- required uint32 kernel= 5;
+ // The number of outputs for the layer
+ required int32 num_filters = 1;
+ // the kernel height/width
+ required int32 kernel= 2;
+
+ // The padding height/width
+ optional int32 pad = 30 [default = 0];
+ // the stride
+ optional int32 stride = 31 [default = 1];
+ // whether to have bias terms
+ optional bool bias_term = 32 [default = true];
}
-message ConcateProto{
- required int32 concate_dimension=1;
- required int32 concate_num=2;
+message ConcateProto {
+ // on which dimension, starts from 0
+ required int32 concate_dimension = 1;
+ // concatenate offset
+ optional int32 concate_num = 30;
}
-// Message that stores parameters used by DataLayer
message DataProto {
- // path to the data file/folder, absolute or relative to the
- // ClusterProto::workspace
- required string path=2;
- // Specify the batch size.
- required uint32 batchsize = 4;
+ // path to the data file/folder, absolute or relative to the workspace
+ required string path = 2;
+ // batch size.
+ required int32 batchsize = 4;
// skip [0,random_skip] records
- optional uint32 random_skip = 5 [default = 0];
+ optional int32 random_skip = 30 [default = 0];
}
message MnistProto {
+ // normalization x/norm_a
+ required float norm_a = 1 [default = 1];
+ // normalization x-norm_b
+ required float norm_b = 2 [default = 0];
+
// elastic distortion
- optional int32 kernel = 1 [default = 0];
- optional float sigma = 2 [default = 0];
- optional float alpha = 3 [default = 0];
+ optional int32 kernel = 30 [default = 0];
+ optional float sigma = 31 [default = 0];
+ optional float alpha = 32 [default = 0];
// rotation or horizontal shearing
- optional float beta = 4 [default = 0];
+ optional float beta = 33 [default = 0];
// scaling
- optional float gamma = 5 [default = 0];
+ optional float gamma = 34 [default = 0];
// scale to this size as input for deformation
- optional int32 resize = 6 [default = 0] ;
- optional int32 elastic_freq = 7 [default = 0];
- optional float norm_a = 8 [default = 1];
- optional float norm_b = 9 [default = 0];
+ optional int32 resize = 35 [default = 0] ;
+ optional int32 elastic_freq = 36 [default = 0];
}
// Message that stores parameters used by DropoutLayer
message DropoutProto {
- optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
+ // dropout ratio
+ optional float dropout_ratio = 30 [default = 0.5];
}
// Message that stores parameters used by InnerProductLayer
message InnerProductProto {
- required uint32 num_output = 1; // The number of outputs for the layer
- optional bool bias_term = 2 [default = true]; // whether to have bias terms
+ // number of outputs for the layer
+ required int32 num_output = 1;
+ // use bias vector or not
+ optional bool bias_term = 30 [default = true];
}
-// Message that stores parameters used by LRNLayer
message LRNProto {
- optional int32 local_size = 1 [default = 5];
- optional float alpha = 2 [default = 1.];
- optional float beta = 3 [default = 0.75];
+ // local response size
+ required int32 local_size = 1 [default = 5];
+ // scale factor
+ optional float alpha = 31 [default = 1.0];
+ // exponential number
+ optional float beta = 32 [default = 0.75];
enum NormRegion {
+ // across channels, e.g., r,g,b
ACROSS_CHANNELS = 0;
+ // within channel, e.g., r, g and b are concatenated into one channel
WITHIN_CHANNEL = 1;
}
- optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
- optional float knorm =5 [default = 1.0];
+ // normalization objective
+ optional NormRegion norm_region = 33 [default = ACROSS_CHANNELS];
+ // offset
+ optional float knorm =34 [default = 1.0];
}
-// Message that stores parameters used by PoolingLayer
message PoolingProto {
+ // The kernel size (square)
+ required int32 kernel= 1;
enum PoolMethod {
MAX = 0;
AVE = 1;
}
// The pooling method
- optional PoolMethod pool = 1 [default = MAX];
- // Pad, kernel size, and stride are all given as a single value for equal
- // dimensions in height and width or as Y, X pairs.
- // The kernel size (square)
- required uint32 kernel= 2;
- // The padding size (equal in Y, X)
- optional uint32 pad = 4 [default = 0];
- // The stride (equal in Y, X)
- optional uint32 stride = 3 [default = 1];
+ optional PoolMethod pool = 30 [default = MAX];
+ // The padding size
+ optional uint32 pad = 31 [default = 0];
+ // The stride
+ optional uint32 stride = 32 [default = 1];
}
message SliceProto{
@@ -280,79 +332,90 @@ message SliceProto{
required int32 slice_num=2;
}
-// Message that stores parameters used by ReLULayer
message ReLUProto {
- // Allow non-zero slope for negative inputs to speed up optimization
- // Described in:
- // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
- // improve neural network acoustic models. In ICML Workshop on Deep Learning
- // for Audio, Speech, and Language Processing.
+ // Ref. Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013).
+ // Rectifier nonlinearities improve neural network acoustic models.
+ // In ICML Workshop on Deep Learning for Audio, Speech, and Language Processing.
optional float negative_slope = 1 [default = 0];
}
-message Record {
- enum Type {
- kSingleLabelImage = 0;
+message UpdaterProto {
+ enum UpdaterType{
+ // noraml SGD with momentum and weight decay
+ kSGD = 1;
+ // adaptive subgradient, http://www.magicbroom.info/Papers/DuchiHaSi10.pdf
+ kAdaGrad = 2;
+ // http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
+ kRMSProp = 3;
+ // Nesterov first optimal gradient method
+ kNesterov = 4;
}
- optional Type type = 1 [default = kSingleLabelImage];
- optional SingleLabelImageRecord image = 2;
-}
+ // updater type
+ required UpdaterType type = 1 [default=kSGD];
+ // configuration for RMSProp algorithm
+ optional RMSPropProto rmsprop_conf = 50;
-// to import caffe's lmdb dataset
-message Datum {
- optional int32 channels = 1;
- optional int32 height = 2;
- optional int32 width = 3;
- // the actual image data, in bytes
- optional bytes data = 4;
- optional int32 label = 5;
- // Optionally, the datum could also hold float data.
- repeated float float_data = 6;
- // If true data contains an encoded image that need to be decoded
- optional bool encoded = 7 [default = false];
-}
-
-message SingleLabelImageRecord {
- repeated int32 shape = 1;
- optional int32 label = 2;
- optional bytes pixel = 3;
- repeated float data = 4;
-}
-
-message UpdaterProto {
- optional float momentum = 4 [default = 0];
- optional float weight_decay = 5 [default = 0];
- // used in changing learning rate
- optional float gamma = 6 [default=1];
- optional float pow=7 [default=0];
- optional float delta=8 [default=0.0000001];
- optional float rho=9 [default=0.9];
- optional float base_learning_rate=12 [default=0];
- optional float final_learning_rate=13 [default=0];
- optional int32 learning_rate_change_frequency = 14 [default=0];
- enum ChangeProto {
+ enum ChangeMethod {
kFixed = 0;
- kInverse_t = 1;
+ kInverseT = 1;
kInverse = 2;
kExponential = 3;
kLinear = 4;
kStep = 5;
kFixedStep = 6;
}
- optional ChangeProto learning_rate_change_method = 16 [default = kFixed];
- optional int32 sync_frequency = 17 [default = 1];
- // warmup the parameters and then send to parameter servers.
- optional float moving_rate = 26 [default = 0];
- optional string param_type = 27 [default = "Param"];
+ // change method for learning rate
+ required ChangeMethod lr_change= 2 [default = kFixed];
+
+ optional FixedStepProto fixedstep_conf=40;
+ optional StepProto step_conf=41;
+ optional LinearProto linear_conf=42;
+ optional ExponentialProto exponential_conf=43;
+ optional InverseProto inverse_conf=44;
+ optional InverseTProto inverset_conf=45;
+
+ optional float momentum = 31 [default = 0];
+ optional float weight_decay = 32 [default = 0];
+ // base learning rate
+ optional float base_lr = 34 [default = 0];
+ // used to avoid divide by 0, i.e. x/(y+delta)
+ optional float delta = 35 [default = 0.00000001];
+}
+
+message RMSPropProto{
+ // history=history*rho_+(1-rho_)*(grad*grad_scale);
+ required float rho = 1;
+}
+
+message FixedStepProto{
repeated int32 step = 28;
+ // lr = step_lr[i] if current step >= step[i]
repeated float step_lr = 29;
}
-message BlobProto {
- optional int32 num = 1 [default = 0];
- optional int32 channels = 2 [default = 0];
- optional int32 height = 3 [default = 0];
- optional int32 width = 4 [default = 0];
- repeated float data = 5 [packed = true];
- repeated float diff = 6 [packed = true];
+message StepProto{
+ // lr = base_lr * gamma^(step/change_freq)
+ required float gamma = 35 [default = 1];
+ // lr = base_lr * gamma^(step/change_freq)
+ required int32 change_freq= 40;
+}
+message LinearProto{
+ // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
+ required int32 change_freq= 40;
+ // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
+ required float final_lr = 39;
+}
+message ExponentialProto{
+ // lr = base / 2^(step/change_freq)
+ required int32 change_freq= 40;
+}
+message InverseTProto{
+ // lr = base_lr / (1+step/final_lr)
+ required float final_lr = 39;
+}
+message InverseProto{
+ // lr = base_lr*(1+gamma*step)^(-pow)
+ required float gamma = 1 [default = 1];
+ // lr = base_lr*(1+gamma*step)^(-pow)
+ required float pow = 2 [default = 0];
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/trainer/trainer.cc
----------------------------------------------------------------------
diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc
index 11499db..2a89de2 100644
--- a/src/trainer/trainer.cc
+++ b/src/trainer/trainer.cc
@@ -3,6 +3,7 @@
#include <map>
#include <queue>
#include <glog/logging.h>
+#include "proto/common.pb.h"
#include "trainer/trainer.h"
#include "mshadow/tensor.h"
using std::vector;
@@ -276,6 +277,8 @@ void Trainer::Start(const ModelProto& mproto, const ClusterProto& cproto,
Run(workers.size(), servers.size());
for(auto& thread: threads)
thread.join();
+ for(auto x: ctx)
+ delete x;
}
void Trainer::Run(int nworkers, int nservers){
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index ae7d582..80e3619 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -9,45 +9,44 @@ using namespace mshadow::expr;
namespace singa {
float Updater::GetLearningRate(int step){
- float ret = 0., r = 0., base=proto_.base_learning_rate();
+ float ret = 0., r = 0., base=proto_.base_lr();
int freq=0;
- switch (proto_.learning_rate_change_method()) {
- case UpdaterProto_ChangeProto_kFixed:
+ switch (proto_.lr_change()) {
+ case UpdaterProto_ChangeMethod_kFixed:
ret = base;
break;
- case UpdaterProto_ChangeProto_kLinear:
+ case UpdaterProto_ChangeMethod_kLinear:
// a is init, b is the final
- freq=proto_.learning_rate_change_frequency();
+ freq=proto_.linear_conf().change_freq();
r = step * 1.0 / freq;
- ret = (1.0 - r) * base + r * proto_.final_learning_rate();
+ ret = (1.0 - r) * base + r * proto_.linear_conf().final_lr();
break;
- case UpdaterProto_ChangeProto_kExponential:
+ case UpdaterProto_ChangeMethod_kExponential:
// a is init, b is the final, from convnet
- CHECK_EQ(base, 2 * proto_.final_learning_rate())
- << "final value should be the half";
- freq=proto_.learning_rate_change_frequency();
+ freq=proto_.exponential_conf().change_freq();
ret = base / pow(2, step * 1. / freq);
break;
- case UpdaterProto_ChangeProto_kInverse_t:
+ case UpdaterProto_ChangeMethod_kInverseT:
// a is init, b is the final, from convnet
- CHECK_EQ(base, 2 * proto_.final_learning_rate())
+ CHECK_EQ(base, 2 * proto_.inverset_conf().final_lr())
<< "final value should be the half";
- ret = base / (1. + step * 1. / proto_.final_learning_rate());
+ ret = base / (1. + step * 1. / proto_.inverset_conf().final_lr());
break;
- case UpdaterProto_ChangeProto_kInverse:
+ case UpdaterProto_ChangeMethod_kInverse:
// a is init, b is gamma, c is pow
- ret=base*pow(1.f+proto_.gamma()*step, -proto_.pow());
+ ret=base*pow(1.f+proto_.inverse_conf().gamma()*step,
+ -proto_.inverse_conf().pow());
break;
- case UpdaterProto_ChangeProto_kStep:
+ case UpdaterProto_ChangeMethod_kStep:
// a is the base learning rate, b is gamma, from caffe
// notice it is step/change_steps, not step*1.0/change_steps
- freq=proto_.learning_rate_change_frequency();
- ret = base * pow(proto_.gamma(), step / freq);
+ freq=proto_.step_conf().change_freq();
+ ret = base * pow(proto_.step_conf().gamma(), step / freq);
break;
- case UpdaterProto_ChangeProto_kFixedStep:
- for(int i=0;i<proto_.step_size();i++){
- if(step>proto_.step(i))
- ret=proto_.step_lr(i);
+ case UpdaterProto_ChangeMethod_kFixedStep:
+ for(int i=0;i<proto_.fixedstep_conf().step_size();i++){
+ if(step>proto_.fixedstep_conf().step(i))
+ ret=proto_.fixedstep_conf().step_lr(i);
}
break;
default:
@@ -59,7 +58,7 @@ float Updater::GetLearningRate(int step){
/***********************SGD with momentum******************************/
void SGDUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
- base_lr_=proto.base_learning_rate();
+ base_lr_=proto.base_lr();
//CHECK_GT(base_lr_, 0);
momentum_=proto.momentum();
weight_decay_=proto.weight_decay();
@@ -88,7 +87,7 @@ void SGDUpdater::Update(int step, shared_ptr<Param> param, float grad_scale){
/***********************Nesterov******************************/
void NesterovUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
- base_lr_=proto.base_learning_rate();
+ base_lr_=proto.base_lr();
CHECK_GT(base_lr_, 0);
weight_decay_=proto.weight_decay();
}
@@ -113,7 +112,7 @@ void NesterovUpdater::Update(int step, shared_ptr<Param> param, float grad_scale
/***********************AdaGrad******************************/
void AdaGradUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
- base_lr_=proto.base_learning_rate();
+ base_lr_=proto.base_lr();
CHECK_GT(base_lr_, 0);
delta_=proto.delta();
weight_decay_=proto.weight_decay();
@@ -137,10 +136,10 @@ void AdaGradUpdater::Update(int step, shared_ptr<Param> param, float grad_scale)
/***********************RMSProp******************************/
void RMSPropUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
- base_lr_=proto.base_learning_rate();
+ base_lr_=proto.base_lr();
CHECK_GT(base_lr_, 0);
delta_=proto.delta();
- rho_=proto.rho();
+ rho_=proto.rmsprop_conf().rho();
weight_decay_=proto.weight_decay();
}