You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/07/23 08:49:29 UTC

[1/3] incubator-singa git commit: SINGA-36 Clean ModelProto, ClusterProto, JobProto and driver program

Repository: incubator-singa
Updated Branches:
  refs/heads/master 29de86337 -> c3a248a4b


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
new file mode 100644
index 0000000..7c462d2
--- /dev/null
+++ b/src/proto/job.proto
@@ -0,0 +1,461 @@
+package singa;
+
+message JobProto {
+  required ClusterProto cluster = 1;
+  required ModelProto model = 2;
+}
+
+message ClusterProto {
+  optional int32 nworker_groups = 1;
+  optional int32 nserver_groups = 2;
+  optional int32 nworkers_per_group = 3 [default = 1];
+  optional int32 nservers_per_group = 4 [default = 1];
+  optional int32 nworkers_per_procs = 5 [default = 1];
+  optional int32 nservers_per_procs = 6 [default = 1];
+
+  // servers and workers in different processes?
+  optional bool server_worker_separate = 11 [default = false];
+
+  // port number is used by ZeroMQ
+  optional int32 start_port = 13 [default = 6723];
+  // local workspace, train/val/test shards, checkpoint files
+  optional string workspace = 14 [default = "workspace"];
+
+  // conduct updates at server side; otherwise do it at worker side
+  optional bool server_update = 40 [default = true];
+  // share memory space between worker groups in one procs
+  optional bool share_memory = 41 [default = true];
+
+  // bandwidth of ethernet, Bytes per second, default is 1 Gbps
+  optional int32 bandwidth=50 [default=134217728];
+  // poll time in milliseconds
+  optional int32 poll_time=51 [default =100];
+}
+
+
+enum Phase {
+  kTrain = 0;
+  kValidation = 1;
+  kTest= 2;
+  // postivie phase for contrastive divergence algorithm
+  kPositive = 3;
+  // negative phase for contrastive divergence algorithm
+  kNegative = 4;
+  kForward = 5;
+  kBackward = 6;
+}
+
+message ModelProto {
+  // model name, e.g., "cifar10-dcnn", "mnist-mlp"
+  required string name = 1;
+  // frequency of displaying training info
+  required int32 display_frequency = 3 ;
+  // total num of steps for training
+  required int32 train_steps = 5;
+  // configuration of SGD updater, including learning rate, etc.
+  required UpdaterProto updater = 7;
+  enum GradCalcAlg {
+    // BP algorithm for feed-forward models, e.g., CNN, MLP, RNN
+    kBackPropagation = 1;
+    // CD algorithm for RBM, DBM etc., models
+    kContrastiveDivergence = 2;
+  }
+ // gradient calculation algorithm
+  required GradCalcAlg alg = 8 [default = kBackPropagation];
+  required NetProto neuralnet = 9;
+
+  // total num of steps for validation
+  optional int32 validation_steps = 30 [default = 0];
+  // total num of steps for test
+  optional int32 test_steps = 31 [default = 0];
+  // frequency of validation
+  optional int32 validation_frequency = 32;
+  // frequency of test
+  optional int32 test_frequency = 33 [default = 0];
+  // frequency of checkpoint
+  optional int32 checkpoint_frequency = 34 [default = 0];
+  // send parameters to servers after training for this num of steps
+  optional int32 warmup_steps = 35 [default = 0];
+  // checkpoint path
+  optional bool resume = 36 [default = false];
+
+   // start display after this num steps
+  optional int32 display_after =  60[default = 0];
+  // start checkpoint after this num steps
+  optional int32 checkpoint_after = 61 [default = 0];
+  // start test after this num steps
+  optional int32 test_after = 62 [default = 0];
+// start validation after this num steps
+  optional int32 validation_after = 63 [default = 0];
+  // last snapshot step
+  optional int32 step = 64 [default = 0];
+  // display debug info
+  optional bool debug = 65 [default = false];
+  // checkpoint files
+  repeated string checkpoint = 66;
+  // reset the version of params loaded from checkpoint file to step
+  optional bool reset_param_version = 67 [default = false];
+}
+
+message NetProto {
+  repeated LayerProto layer = 1;
+  // partitioning type for parallelism
+  optional int32 partition_dim = 2 [default = 0];
+}
+
+// weight matrix should be defined before bias vector
+message ParamProto {
+  enum InitMethod {
+    // fix the values of all parameters  a constant in the value field
+    kConstant = 0;
+    // sample gaussian with std and mean
+    kGaussian = 1;
+    // uniform sampling between low and high
+    kUniform = 2;
+    // copy the content and history which are from previous training
+    kPretrained = 3;
+    // from Toronto Convnet, let a=1/sqrt(fan_in), w*=a after generating from
+    // Gaussian distribution
+    kGaussainSqrtFanIn = 4;
+    // from Toronto Convnet, rectified linear activation, let
+    // a=sqrt(3)/sqrt(fan_in), range is [-a, +a]; no need to set value=sqrt(3),
+    // the program will multiply it.
+    kUniformSqrtFanIn = 5;
+    // from Theano MLP tutorial, let a=sqrt(6/(fan_in+fan_out)). for tanh
+    // activation, range is [-a, +a], for sigmoid activation, range is
+    // [-4a, +4a], put the scale factor to value field.
+    // <a href="http://deeplearning.net/tutorial/mlp.html"> Theano MLP</a>
+    kUniformSqrtFanInOut = 6;
+  }
+  optional InitMethod init_method = 1 [default = kGaussian];
+  // constant init
+  optional float value = 5 [default = 1];
+  // for uniform sampling
+  optional float low = 6 [default = -1];
+  optional float high = 7 [default = 1];
+  // for gaussian sampling
+  optional float mean = 8 [default = 0];
+  optional float std = 9 [default = 1];
+  // multiplied on the global learning rate.
+  optional float learning_rate_multiplier = 15 [default = 1];
+  // multiplied on the global weight decay.
+  optional float weight_decay_multiplier = 16 [default = 1];
+  // partition dimension, -1 for no partition
+  optional int32 partition_dim = 30;
+  // usually, the program will infer the param shape
+  repeated int32 shape = 31;
+  // used for identifying the same params from diff models and display deug info
+  optional string name =  61 [default = ""];
+  // name of the owner param from which this param shares the values
+  optional string share_from = 62;
+  // used interally
+  optional int32 id = 63;
+  // parameter slice limit (Google Protobuf also has size limit)
+  optional int32 split_threshold = 64 [default = 5000000];
+  // used internally
+  optional int32 owner = 65 [default = -1];
+}
+
+enum PartitionType{
+  kDataPartition=0;
+  kLayerPartition=1;
+  kNone=2;
+}
+
+message LayerProto {
+  // the layer name used for identification
+  required string name = 1;
+  enum LayerType{
+    kBridgeSrc = 15;
+    kBridgeDst = 16;
+    kConvolution = 1;
+    kConcate = 2;
+    kShardData = 3;
+    kDropout = 4;
+    kInnerProduct = 5;
+    kLabel = 18;
+    kLMDBData = 17;
+    kLRN = 6;
+    kMnist = 7;
+    kPooling = 8;
+    kPrefetch = 19;
+    kReLU = 9;
+    kRGBImage = 10;
+    kSoftmaxLoss = 11;
+    kSlice = 12;
+    kSplit = 13;
+    kTanh = 14;
+  }
+  // source layer names
+  repeated string srclayers = 3;
+  // parameters, e.g., weight matrix or bias vector
+  repeated ParamProto param = 12;
+  // all layers are included in the net structure for training phase by default.
+  // some layers like data layer for loading test data are not used by training
+  // phase should be removed by setting the exclude field.
+  repeated Phase exclude = 15;
+  // the layer type from the enum above
+  required LayerType type = 20;
+  // configuration for convolution layer
+  optional ConvolutionProto convolution_conf = 30;
+  // configuration for concatenation layer
+  optional ConcateProto concate_conf = 31;
+  // configuration for dropout layer
+  optional DropoutProto dropout_conf = 33;
+  // configuration for inner product layer
+  optional InnerProductProto innerproduct_conf = 34;
+  // configuration for local response normalization layer
+  optional DataProto lmdbdata_conf = 35;
+  // configuration for local response normalization layer
+  optional LRNProto lrn_conf = 45;
+  // configuration for mnist parser layer
+  optional MnistProto mnist_conf= 36;
+  // configuration for pooling layer
+  optional PoolingProto pooling_conf = 37;
+  // configuration for prefetch layer
+  optional PrefetchProto prefetch_conf = 44;
+  // configuration for rectified linear unit layer
+  optional ReLUProto relu_conf = 38;
+  // configuration for rgb image parser layer
+  optional RGBImageProto rgbimage_conf = 39;
+  // configuration for data layer
+  optional DataProto sharddata_conf = 32;
+ // configuration for slice layer
+  optional SliceProto slice_conf = 41;
+  // configuration for softmax loss layer
+  optional SoftmaxLossProto softmaxloss_conf = 40;
+  // configuration for split layer
+  optional SplitProto split_conf = 42;
+  // configuration for tanh layer
+  optional TanhProto tanh_conf = 43;
+
+
+  // overrides the partition dimension for neural net
+  optional int32 partition_dim =59 [default = -1];
+  optional string datablob = 58 [default = "unknow"];
+
+  // names of parameters shared from other layers
+  repeated string share_param = 60;
+  optional int32 partition_id = 62 [default = 0];
+}
+
+message RGBImageProto {
+  // scale factor for each pixel
+  optional float scale = 1 [default = 1.0];
+  // size after cropping
+  optional int32 cropsize = 2 [default = 0];
+  // mirror the image
+  optional bool mirror = 3 [default = false];
+  // meanfile path
+  optional string meanfile = 4 [default = ""];
+}
+
+message PrefetchProto {
+  repeated LayerProto sublayers = 1;
+}
+
+message SplitProto {
+  optional int32 num_splits = 1 [default =1];
+}
+
+// scaled tan: A*tan(B*x)
+message TanhProto {
+  // A of A*tan(B*x)
+  optional float outer_scale = 1 [default = 1.0];
+  // B of A*tan(B*x)
+  optional float inner_scale = 2 [default = 1.0];
+}
+
+message SoftmaxLossProto {
+  // computing accuracy against topk results
+  optional int32 topk = 1 [default = 1];
+  // loss scale factor
+  optional float scale= 30 [default = 1];
+}
+
+message ConvolutionProto {
+  // The number of outputs for the layer
+  required int32 num_filters = 1;
+  // the kernel height/width
+  required int32 kernel= 2;
+
+  // The padding height/width
+  optional int32 pad = 30 [default = 0];
+  // the stride
+  optional int32 stride = 31 [default = 1];
+  // whether to have bias terms
+  optional bool bias_term = 32 [default = true];
+}
+
+message ConcateProto {
+  // on which dimension, starts from 0
+  required int32 concate_dim = 1;
+}
+
+message DataProto {
+  // path to the data file/folder, absolute or relative to the workspace
+  required string path = 2;
+  // batch size.
+  required int32 batchsize = 4;
+  // skip [0,random_skip] records
+  optional int32 random_skip = 30 [default = 0];
+}
+
+message MnistProto {
+  // normalization x/norm_a
+  required float norm_a = 1 [default = 1];
+  // normalization x-norm_b
+  required float norm_b = 2 [default = 0];
+
+  // elastic distortion
+  optional int32 kernel = 30 [default = 0];
+  optional float sigma = 31 [default = 0];
+  optional float alpha = 32 [default = 0];
+  // rotation or horizontal shearing
+  optional float beta = 33 [default = 0];
+  // scaling
+  optional float gamma = 34 [default = 0];
+  // scale to this size as input for deformation
+  optional int32 resize = 35 [default = 0] ;
+  optional int32 elastic_freq = 36 [default = 0];
+}
+
+// Message that stores parameters used by DropoutLayer
+message DropoutProto {
+  // dropout ratio
+  optional float dropout_ratio = 30 [default = 0.5];
+}
+
+// Message that stores parameters used by InnerProductLayer
+message InnerProductProto {
+  // number of outputs for the layer
+  required int32 num_output = 1;
+  // use bias vector or not
+  optional bool bias_term = 30 [default = true];
+}
+
+message LRNProto {
+  // local response size
+  required int32 local_size = 1 [default = 5];
+  // scale factor
+  optional float alpha = 31 [default = 1.0];
+  // exponential number
+  optional float beta = 32 [default = 0.75];
+  enum NormRegion {
+    // across channels, e.g., r,g,b
+    ACROSS_CHANNELS = 0;
+    // within channel, e.g., r, g and b are concatenated into one channel
+    WITHIN_CHANNEL = 1;
+  }
+  // normalization objective
+  optional NormRegion norm_region = 33 [default = ACROSS_CHANNELS];
+  // offset
+  optional float knorm =34 [default = 1.0];
+}
+
+message PoolingProto {
+  // The kernel size (square)
+  required int32 kernel= 1;
+  enum PoolMethod {
+    MAX = 0;
+    AVE = 1;
+  }
+  // The pooling method
+  optional PoolMethod pool = 30 [default = MAX];
+  // The padding size
+  optional uint32 pad = 31 [default = 0];
+  // The stride
+  optional uint32 stride = 32 [default = 1];
+}
+
+message SliceProto{
+  required int32 slice_dim = 1;
+}
+
+message ReLUProto {
+  // Ref. Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013).
+  // Rectifier nonlinearities improve neural network acoustic models.
+  // In ICML Workshop on Deep Learning for Audio, Speech, and Language Processing.
+  optional float negative_slope = 1 [default = 0];
+}
+
+message UpdaterProto {
+  enum UpdaterType{
+    // noraml SGD with momentum and weight decay
+    kSGD = 1;
+    // adaptive subgradient, http://www.magicbroom.info/Papers/DuchiHaSi10.pdf
+    kAdaGrad = 2;
+    // http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
+    kRMSProp = 3;
+    // Nesterov first optimal gradient method
+    kNesterov = 4;
+  }
+  // updater type
+  required UpdaterType type = 1 [default=kSGD];
+  // configuration for RMSProp algorithm
+  optional RMSPropProto rmsprop_conf = 50;
+
+ enum ChangeMethod {
+    kFixed = 0;
+    kInverseT = 1;
+    kInverse = 2;
+    kExponential = 3;
+    kLinear = 4;
+    kStep = 5;
+    kFixedStep = 6;
+  }
+  // change method for learning rate
+  required ChangeMethod lr_change= 2 [default = kFixed];
+
+  optional FixedStepProto fixedstep_conf=40;
+  optional StepProto step_conf=41;
+  optional LinearProto linear_conf=42;
+  optional ExponentialProto exponential_conf=43;
+  optional InverseProto inverse_conf=44;
+  optional InverseTProto inverset_conf=45;
+
+  optional float momentum = 31 [default = 0];
+  optional float weight_decay = 32 [default = 0];
+  // base learning rate
+  optional float base_lr = 34 [default = 0];
+  // used to avoid divide by 0, i.e. x/(y+delta)
+  optional float delta = 35 [default = 0.00000001];
+}
+
+message RMSPropProto{
+  // history=history*rho_+(1-rho_)*(grad*grad_scale);
+  required float rho = 1;
+}
+
+message FixedStepProto{
+  repeated int32 step = 28;
+  // lr = step_lr[i] if current step >= step[i]
+  repeated float step_lr = 29;
+}
+
+message StepProto{
+  // lr = base_lr * gamma^(step/change_freq)
+  required float gamma = 35 [default = 1];
+  // lr = base_lr * gamma^(step/change_freq)
+  required int32 change_freq= 40;
+}
+message LinearProto{
+  // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
+  required int32 change_freq= 40;
+  // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
+  required float final_lr = 39;
+}
+message ExponentialProto{
+  // lr = base / 2^(step/change_freq)
+  required int32 change_freq= 40;
+}
+message InverseTProto{
+  // lr = base_lr / (1+step/final_lr)
+  required float final_lr = 39;
+}
+message InverseProto{
+  // lr = base_lr*(1+gamma*step)^(-pow)
+  required float gamma = 1 [default = 1];
+  // lr = base_lr*(1+gamma*step)^(-pow)
+  required float pow = 2 [default = 0];
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
deleted file mode 100644
index f3b8dfe..0000000
--- a/src/proto/model.proto
+++ /dev/null
@@ -1,427 +0,0 @@
-package singa;
-enum Phase {
-  kTrain = 0;
-  kValidation = 1;
-  kTest= 2;
-  // postivie phase for contrastive divergence algorithm
-  kPositive = 3;
-  // negative phase for contrastive divergence algorithm
-  kNegative = 4;
-  kForward = 5;
-  kBackward = 6;
-}
-
-message ModelProto {
-  // model name, e.g., "cifar10-dcnn", "mnist-mlp"
-  required string name = 1;
-  // frequency of displaying training info
-  required int32 display_frequency = 3 ;
-  // total num of steps for training
-  required int32 train_steps = 5;
-  // configuration of SGD updater, including learning rate, etc.
-  required UpdaterProto updater = 7;
-  enum GradCalcAlg {
-    // BP algorithm for feed-forward models, e.g., CNN, MLP, RNN
-    kBackPropagation = 1;
-    // CD algorithm for RBM, DBM etc., models
-    kContrastiveDivergence = 2;
-  }
- // gradient calculation algorithm
-  required GradCalcAlg alg = 8 [default = kBackPropagation];
-  required NetProto neuralnet = 9;
-
-  // total num of steps for validation
-  optional int32 validation_steps = 30 [default = 0];
-  // total num of steps for test
-  optional int32 test_steps = 31 [default = 0];
-  // frequency of validation
-  optional int32 validation_frequency = 32;
-  // frequency of test
-  optional int32 test_frequency = 33 [default = 0];
-  // frequency of checkpoint
-  optional int32 checkpoint_frequency = 34 [default = 0];
-  // send parameters to servers after training for this num of steps
-  optional int32 warmup_steps = 35 [default = 0];
-  // checkpoint path
-  optional bool resume = 36 [default = false];
-
-   // start display after this num steps
-  optional int32 display_after =  60[default = 0];
-  // start checkpoint after this num steps
-  optional int32 checkpoint_after = 61 [default = 0];
-  // start test after this num steps
-  optional int32 test_after = 62 [default = 0];
-// start validation after this num steps
-  optional int32 validation_after = 63 [default = 0];
-  // last snapshot step
-  optional int32 step = 64 [default = 0];
-  // display debug info
-  optional bool debug = 65 [default = false];
-  // checkpoint files
-  repeated string checkpoint = 66;
-  // reset the version of params loaded from checkpoint file to step
-  optional bool reset_param_version = 67 [default = false];
-}
-
-message NetProto {
-  repeated LayerProto layer = 1;
-  // partitioning type for parallelism
-  optional int32 partition_dim = 2 [default = 0];
-}
-
-// weight matrix should be defined before bias vector
-message ParamProto {
-  enum InitMethod {
-    // fix the values of all parameters  a constant in the value field
-    kConstant = 0;
-    // sample gaussian with std and mean
-    kGaussian = 1;
-    // uniform sampling between low and high
-    kUniform = 2;
-    // copy the content and history which are from previous training
-    kPretrained = 3;
-    // from Toronto Convnet, let a=1/sqrt(fan_in), w*=a after generating from
-    // Gaussian distribution
-    kGaussainSqrtFanIn = 4;
-    // from Toronto Convnet, rectified linear activation, let
-    // a=sqrt(3)/sqrt(fan_in), range is [-a, +a]; no need to set value=sqrt(3),
-    // the program will multiply it.
-    kUniformSqrtFanIn = 5;
-    // from Theano MLP tutorial, let a=sqrt(6/(fan_in+fan_out)). for tanh
-    // activation, range is [-a, +a], for sigmoid activation, range is
-    // [-4a, +4a], put the scale factor to value field.
-    // <a href="http://deeplearning.net/tutorial/mlp.html"> Theano MLP</a>
-    kUniformSqrtFanInOut = 6;
-  }
-  optional InitMethod init_method = 1 [default = kGaussian];
-  // constant init
-  optional float value = 5 [default = 1];
-  // for uniform sampling
-  optional float low = 6 [default = -1];
-  optional float high = 7 [default = 1];
-  // for gaussian sampling
-  optional float mean = 8 [default = 0];
-  optional float std = 9 [default = 1];
-  // multiplied on the global learning rate.
-  optional float learning_rate_multiplier = 15 [default = 1];
-  // multiplied on the global weight decay.
-  optional float weight_decay_multiplier = 16 [default = 1];
-  // partition dimension, -1 for no partition
-  optional int32 partition_dim = 30;
-  // usually, the program will infer the param shape
-  repeated int32 shape = 31;
-  // used for identifying the same params from diff models and display deug info
-  optional string name =  61 [default = ""];
-  // name of the owner param from which this param shares the values
-  optional string share_from = 62;
-  // used interally
-  optional int32 id = 63;
-  // parameter slice limit (Google Protobuf also has size limit)
-  optional int32 split_threshold = 64 [default = 5000000];
-  // used internally
-  optional int32 owner = 65 [default = -1];
-}
-
-enum PartitionType{
-  kDataPartition=0;
-  kLayerPartition=1;
-  kNone=2;
-}
-
-message LayerProto {
-  // the layer name used for identification
-  required string name = 1;
-  enum LayerType{
-    kBridgeSrc = 15;
-    kBridgeDst = 16;
-    kConvolution = 1;
-    kConcate = 2;
-    kShardData = 3;
-    kDropout = 4;
-    kInnerProduct = 5;
-    kLabel = 18;
-    kLMDBData = 17;
-    kLRN = 6;
-    kMnist = 7;
-    kPooling = 8;
-    kPrefetch = 19;
-    kReLU = 9;
-    kRGBImage = 10;
-    kSoftmaxLoss = 11;
-    kSlice = 12;
-    kSplit = 13;
-    kTanh = 14;
-  }
-  // source layer names
-  repeated string srclayers = 3;
-  // parameters, e.g., weight matrix or bias vector
-  repeated ParamProto param = 12;
-  // all layers are included in the net structure for training phase by default.
-  // some layers like data layer for loading test data are not used by training
-  // phase should be removed by setting the exclude field.
-  repeated Phase exclude = 15;
-  // the layer type from the enum above
-  required LayerType type = 20;
-  // configuration for convolution layer
-  optional ConvolutionProto convolution_conf = 30;
-  // configuration for concatenation layer
-  optional ConcateProto concate_conf = 31;
-  // configuration for dropout layer
-  optional DropoutProto dropout_conf = 33;
-  // configuration for inner product layer
-  optional InnerProductProto innerproduct_conf = 34;
-  // configuration for local response normalization layer
-  optional DataProto lmdbdata_conf = 35;
-  // configuration for local response normalization layer
-  optional LRNProto lrn_conf = 45;
-  // configuration for mnist parser layer
-  optional MnistProto mnist_conf= 36;
-  // configuration for pooling layer
-  optional PoolingProto pooling_conf = 37;
-  // configuration for prefetch layer
-  optional PrefetchProto prefetch_conf = 44;
-  // configuration for rectified linear unit layer
-  optional ReLUProto relu_conf = 38;
-  // configuration for rgb image parser layer
-  optional RGBImageProto rgbimage_conf = 39;
-  // configuration for data layer
-  optional DataProto sharddata_conf = 32;
- // configuration for slice layer
-  optional SliceProto slice_conf = 41;
-  // configuration for softmax loss layer
-  optional SoftmaxLossProto softmaxloss_conf = 40;
-  // configuration for split layer
-  optional SplitProto split_conf = 42;
-  // configuration for tanh layer
-  optional TanhProto tanh_conf = 43;
-
-
-  // overrides the partition dimension for neural net
-  optional int32 partition_dim =59 [default = -1];
-  optional string datablob = 58 [default = "unknow"];
-
-  // names of parameters shared from other layers
-  repeated string share_param = 60;
-  optional int32 partition_id = 62 [default = 0];
-}
-
-message RGBImageProto {
-  // scale factor for each pixel
-  optional float scale = 1 [default = 1.0];
-  // size after cropping
-  optional int32 cropsize = 2 [default = 0];
-  // mirror the image
-  optional bool mirror = 3 [default = false];
-  // meanfile path
-  optional string meanfile = 4 [default = ""];
-}
-
-message PrefetchProto {
-  repeated LayerProto sublayers = 1;
-}
-
-message SplitProto {
-  optional int32 num_splits = 1 [default =1];
-}
-
-// scaled tan: A*tan(B*x)
-message TanhProto {
-  // A of A*tan(B*x)
-  optional float outer_scale = 1 [default = 1.0];
-  // B of A*tan(B*x)
-  optional float inner_scale = 2 [default = 1.0];
-}
-
-message SoftmaxLossProto {
-  // computing accuracy against topk results
-  optional int32 topk = 1 [default = 1];
-  // loss scale factor
-  optional float scale= 30 [default = 1];
-}
-
-message ConvolutionProto {
-  // The number of outputs for the layer
-  required int32 num_filters = 1;
-  // the kernel height/width
-  required int32 kernel= 2;
-
-  // The padding height/width
-  optional int32 pad = 30 [default = 0];
-  // the stride
-  optional int32 stride = 31 [default = 1];
-  // whether to have bias terms
-  optional bool bias_term = 32 [default = true];
-}
-
-message ConcateProto {
-  // on which dimension, starts from 0
-  required int32 concate_dim = 1;
-}
-
-message DataProto {
-  // path to the data file/folder, absolute or relative to the workspace
-  required string path = 2;
-  // batch size.
-  required int32 batchsize = 4;
-  // skip [0,random_skip] records
-  optional int32 random_skip = 30 [default = 0];
-}
-
-message MnistProto {
-  // normalization x/norm_a
-  required float norm_a = 1 [default = 1];
-  // normalization x-norm_b
-  required float norm_b = 2 [default = 0];
-
-  // elastic distortion
-  optional int32 kernel = 30 [default = 0];
-  optional float sigma = 31 [default = 0];
-  optional float alpha = 32 [default = 0];
-  // rotation or horizontal shearing
-  optional float beta = 33 [default = 0];
-  // scaling
-  optional float gamma = 34 [default = 0];
-  // scale to this size as input for deformation
-  optional int32 resize = 35 [default = 0] ;
-  optional int32 elastic_freq = 36 [default = 0];
-}
-
-// Message that stores parameters used by DropoutLayer
-message DropoutProto {
-  // dropout ratio
-  optional float dropout_ratio = 30 [default = 0.5];
-}
-
-// Message that stores parameters used by InnerProductLayer
-message InnerProductProto {
-  // number of outputs for the layer
-  required int32 num_output = 1;
-  // use bias vector or not
-  optional bool bias_term = 30 [default = true];
-}
-
-message LRNProto {
-  // local response size
-  required int32 local_size = 1 [default = 5];
-  // scale factor
-  optional float alpha = 31 [default = 1.0];
-  // exponential number
-  optional float beta = 32 [default = 0.75];
-  enum NormRegion {
-    // across channels, e.g., r,g,b
-    ACROSS_CHANNELS = 0;
-    // within channel, e.g., r, g and b are concatenated into one channel
-    WITHIN_CHANNEL = 1;
-  }
-  // normalization objective
-  optional NormRegion norm_region = 33 [default = ACROSS_CHANNELS];
-  // offset
-  optional float knorm =34 [default = 1.0];
-}
-
-message PoolingProto {
-  // The kernel size (square)
-  required int32 kernel= 1;
-  enum PoolMethod {
-    MAX = 0;
-    AVE = 1;
-  }
-  // The pooling method
-  optional PoolMethod pool = 30 [default = MAX];
-  // The padding size
-  optional uint32 pad = 31 [default = 0];
-  // The stride
-  optional uint32 stride = 32 [default = 1];
-}
-
-message SliceProto{
-  required int32 slice_dim = 1;
-}
-
-message ReLUProto {
-  // Ref. Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013).
-  // Rectifier nonlinearities improve neural network acoustic models.
-  // In ICML Workshop on Deep Learning for Audio, Speech, and Language Processing.
-  optional float negative_slope = 1 [default = 0];
-}
-
-message UpdaterProto {
-  enum UpdaterType{
-    // noraml SGD with momentum and weight decay
-    kSGD = 1;
-    // adaptive subgradient, http://www.magicbroom.info/Papers/DuchiHaSi10.pdf
-    kAdaGrad = 2;
-    // http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
-    kRMSProp = 3;
-    // Nesterov first optimal gradient method
-    kNesterov = 4;
-  }
-  // updater type
-  required UpdaterType type = 1 [default=kSGD];
-  // configuration for RMSProp algorithm
-  optional RMSPropProto rmsprop_conf = 50;
-
- enum ChangeMethod {
-    kFixed = 0;
-    kInverseT = 1;
-    kInverse = 2;
-    kExponential = 3;
-    kLinear = 4;
-    kStep = 5;
-    kFixedStep = 6;
-  }
-  // change method for learning rate
-  required ChangeMethod lr_change= 2 [default = kFixed];
-
-  optional FixedStepProto fixedstep_conf=40;
-  optional StepProto step_conf=41;
-  optional LinearProto linear_conf=42;
-  optional ExponentialProto exponential_conf=43;
-  optional InverseProto inverse_conf=44;
-  optional InverseTProto inverset_conf=45;
-
-  optional float momentum = 31 [default = 0];
-  optional float weight_decay = 32 [default = 0];
-  // base learning rate
-  optional float base_lr = 34 [default = 0];
-  // used to avoid divide by 0, i.e. x/(y+delta)
-  optional float delta = 35 [default = 0.00000001];
-}
-
-message RMSPropProto{
-  // history=history*rho_+(1-rho_)*(grad*grad_scale);
-  required float rho = 1;
-}
-
-message FixedStepProto{
-  repeated int32 step = 28;
-  // lr = step_lr[i] if current step >= step[i]
-  repeated float step_lr = 29;
-}
-
-message StepProto{
-  // lr = base_lr * gamma^(step/change_freq)
-  required float gamma = 35 [default = 1];
-  // lr = base_lr * gamma^(step/change_freq)
-  required int32 change_freq= 40;
-}
-message LinearProto{
-  // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
-  required int32 change_freq= 40;
-  // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
-  required float final_lr = 39;
-}
-message ExponentialProto{
-  // lr = base / 2^(step/change_freq)
-  required int32 change_freq= 40;
-}
-message InverseTProto{
-  // lr = base_lr / (1+step/final_lr)
-  required float final_lr = 39;
-}
-message InverseProto{
-  // lr = base_lr*(1+gamma*step)^(-pow)
-  required float gamma = 1 [default = 1];
-  // lr = base_lr*(1+gamma*step)^(-pow)
-  required float pow = 2 [default = 0];
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/proto/singa.proto
----------------------------------------------------------------------
diff --git a/src/proto/singa.proto b/src/proto/singa.proto
new file mode 100644
index 0000000..94af58d
--- /dev/null
+++ b/src/proto/singa.proto
@@ -0,0 +1,8 @@
+package singa;
+
+message SingaProto {
+  // ip/hostname:port[,ip/hostname:port]
+  required string zookeeper_host = 1;
+  // if not set, use the default dir of glog
+  optional string log_dir = 2;
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/test/test_cluster.cc
----------------------------------------------------------------------
diff --git a/src/test/test_cluster.cc b/src/test/test_cluster.cc
index c34dd0f..a51126d 100644
--- a/src/test/test_cluster.cc
+++ b/src/test/test_cluster.cc
@@ -1,5 +1,4 @@
 #include "gtest/gtest.h"
-#include "proto/cluster.pb.h"
 #include "utils/cluster.h"
 
 using namespace singa;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/trainer/trainer.cc
----------------------------------------------------------------------
diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc
index 78ec49f..4a0a47a 100644
--- a/src/trainer/trainer.cc
+++ b/src/trainer/trainer.cc
@@ -11,6 +11,7 @@
 #include "trainer/trainer.h"
 #include "mshadow/tensor.h"
 
+
 namespace singa {
 using std::vector;
 using std::map;
@@ -193,7 +194,7 @@ vector<Worker*> Trainer::CreateWorkers(int nthreads, const ModelProto& mconf){
   return workers;
 }
 
-void Trainer::Resume(ModelProto& mconf) {
+void Trainer::Resume(ModelProto* modelConf) {
   tinydir_dir dir;
   string folder = Cluster::Get()->checkpoint_folder();
   tinydir_open(&dir, folder.c_str());
@@ -223,34 +224,34 @@ void Trainer::Resume(ModelProto& mconf) {
   }
 
   if (latest_step > 0) {
-    mconf.set_step(latest_step);
+    modelConf->set_step(latest_step);
     for (auto ck_file : ck_files)
-      mconf.add_checkpoint(folder + "/" +string(ck_file));
+      modelConf->add_checkpoint(folder + "/" +string(ck_file));
   }
   tinydir_close(&dir);
 }
 
-void Trainer::Start(ModelProto& mconf, const GlobalProto& gconf,
-                    const ClusterProto& cconf, int job, bool resume){
+void Trainer::Start(int job, bool resume,
+    const JobProto& jobConf, const SingaProto& singaConf) {
   // register job to zookeeper at the beginning
-  auto cluster=Cluster::Get(gconf, cconf, job);
-
-  RegisterDefaultClasses(mconf);
+  auto cluster = Cluster::Get(job, singaConf, jobConf.cluster());
+  ModelProto model = jobConf.model();
+  RegisterDefaultClasses(model);
   if (resume)
-    Resume(mconf);
+    Resume(&model);
 
   router_ = new Router();
   router_->Bind(kInprocRouterEndpoint);
   const string hostip = cluster->hostip();
   int port = router_->Bind("tcp://" + hostip + ":*");
   // register endpoint to zookeeper
-  cluster->Register(hostip + ":" + std::to_string(port), getpid());
+  cluster->Register(getpid(), hostip + ":" + std::to_string(port));
 
   int nthreads = 1;
-  const vector<Worker*> workers = CreateWorkers(nthreads, mconf);
+  const vector<Worker*> workers = CreateWorkers(nthreads, model);
   nthreads += workers.size();
-  const vector<Server*> servers = CreateServers(nthreads, mconf);
-  SetupWorkerServer(mconf, workers, servers);
+  const vector<Server*> servers = CreateServers(nthreads, model);
+  SetupWorkerServer(model, workers, servers);
 
 #ifdef USE_MPI
   for (int i = 0; i < nthreads; i++)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/trainer/worker.cc
----------------------------------------------------------------------
diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc
index 7d779ad..87d251d 100644
--- a/src/trainer/worker.cc
+++ b/src/trainer/worker.cc
@@ -6,7 +6,6 @@
 #include "utils/cluster.h"
 #include "utils/factory.h"
 #include "trainer/worker.h"
-#include "proto/model.pb.h"
 
 namespace singa {
 using std::thread;
@@ -173,6 +172,9 @@ void Worker::Run() {
     step_++;
   }
 
+  // save the model
+  Checkpoint(step_, train_net_);
+
   // clean up
   if(updater_ == nullptr) {
     int svr_grp = grp_id_ / cluster->nworker_groups_per_server_group();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/utils/cluster.cc
----------------------------------------------------------------------
diff --git a/src/utils/cluster.cc b/src/utils/cluster.cc
index 791332d..6dad2a8 100644
--- a/src/utils/cluster.cc
+++ b/src/utils/cluster.cc
@@ -3,18 +3,17 @@
 #include <unistd.h>
 #include <fstream>
 #include "utils/cluster.h"
-#include "proto/cluster.pb.h"
 #include "proto/common.pb.h"
 #include <sys/stat.h>
 #include <sys/types.h>
 namespace singa {
 
 std::shared_ptr<Cluster> Cluster::instance_;
-Cluster::Cluster(const GlobalProto & global, const ClusterProto &cluster,
-                 int job_id) {
-  cluster_ = cluster;
-  global_ = global;
-  SetupFolders(cluster);
+Cluster::Cluster(
+    int job, const SingaProto& singaConf, const ClusterProto& clusterConf) {
+  cluster_ = clusterConf;
+  singa_ = singaConf;
+  SetupFolders(clusterConf);
   if(server_worker_separate())
     nprocs_=nworker_procs()+nserver_procs();
   else
@@ -38,14 +37,14 @@ Cluster::Cluster(const GlobalProto & global, const ClusterProto &cluster,
     }
   }
 
-  auto rt = new ZKClusterRT(global_.zookeeper_host(), job_id);
+  auto rt = new ZKClusterRT(singa_.zookeeper_host(), job);
   rt->Init();
   cluster_rt_=shared_ptr<ClusterRuntime>(static_cast<ClusterRuntime*>(rt));
 
   hostip_=GetHostIP();
 }
 
-void Cluster::Register(const string& endpoint, int pid) {
+void Cluster::Register(int pid, const string& endpoint) {
   procs_id_=cluster_rt_->RegistProc(endpoint, pid);
   CHECK_GE(procs_id_,0);
   CHECK_LT(procs_id_,nprocs());
@@ -69,9 +68,9 @@ void Cluster::SetupFolders(const ClusterProto &cluster){
   mkdir(checkpoint_folder().c_str(),  S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 }
 
-shared_ptr<Cluster> Cluster::Get(const GlobalProto& global,
-                                 const ClusterProto& cluster, int job_id){
-  instance_.reset(new Cluster(global, cluster, job_id));
+shared_ptr<Cluster> Cluster::Get(
+    int job, const SingaProto& singaConf, const ClusterProto& clusterConf) {
+  instance_.reset(new Cluster(job, singaConf, clusterConf));
   return instance_;
 }
 
@@ -82,7 +81,7 @@ shared_ptr<Cluster> Cluster::Get() {
   }
   return instance_;
 }
-int Cluster::Hash(int gid, int id, int flag){
+int Cluster::Hash(int gid, int id, int flag) {
   int ret=-1;
   if(flag==kServer){
     ret=(flag*cluster_.nserver_groups()+gid)*cluster_.nservers_per_group() + id;
@@ -91,7 +90,7 @@ int Cluster::Hash(int gid, int id, int flag){
   }
   return ret;
 }
-int Cluster::ProcsIDOf(int group_id, int id, int flag){
+int Cluster::ProcsIDOf(int group_id, int id, int flag) {
   return procs_ids_.at(Hash(group_id, id, flag));
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/utils/param.cc
----------------------------------------------------------------------
diff --git a/src/utils/param.cc b/src/utils/param.cc
index 69e3b09..5541acc 100644
--- a/src/utils/param.cc
+++ b/src/utils/param.cc
@@ -3,7 +3,7 @@
 #include <chrono>
 #include <random>
 #include "utils/param.h"
-#include "proto/cluster.pb.h"
+#include "proto/job.pb.h"
 #include "mshadow/tensor.h"
 #include "utils/singleton.h"
 using namespace mshadow;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/utils/tool.cc
----------------------------------------------------------------------
diff --git a/src/utils/tool.cc b/src/utils/tool.cc
index 267d266..3ffd0e8 100644
--- a/src/utils/tool.cc
+++ b/src/utils/tool.cc
@@ -2,8 +2,8 @@
 #include <glog/logging.h>
 #include <iostream>
 #include <fstream>
-#include "proto/cluster.pb.h"
 #include "utils/cluster_rt.h"
+#include "proto/singa.pb.h"
 #include "utils/common.h"
 #ifndef GFLAGS_GFLAGS_H_
   namespace gflags = google;
@@ -15,7 +15,7 @@ int main(int argc, char **argv) {
   google::InitGoogleLogging(argv[0]);
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
-  singa::GlobalProto global;
+  singa::SingaProto global;
   singa::ReadProtoFromTextFile(FLAGS_global.c_str(), &global);
   singa::SetupLog(global.log_dir(), "SingaTool");
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index 8e949ef..18e53ce 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -2,7 +2,7 @@
 #include "utils/updater.h"
 #include "mshadow/tensor.h"
 #include "mshadow/cxxnet_op.h"
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 using namespace mshadow;
 using namespace mshadow::expr;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/tool/gen_hosts.py
----------------------------------------------------------------------
diff --git a/tool/gen_hosts.py b/tool/gen_hosts.py
index a3bec47..e38c8bf 100644
--- a/tool/gen_hosts.py
+++ b/tool/gen_hosts.py
@@ -4,19 +4,20 @@ import argparse
 import os
 import sys
 from google.protobuf import text_format
-from pb2.cluster_pb2 import ClusterProto
+from pb2.job_pb2 import JobProto
 
 # parse command line
 parser = argparse.ArgumentParser(description='Generate host list from host file for a SINGA job')
-parser.add_argument('-conf', dest='conf', metavar='CONF_FILE', required=True, help='cluster.conf file')
+parser.add_argument('-conf', dest='conf', metavar='CONF_FILE', required=True, help='job.conf file')
 parser.add_argument('-hosts', dest='hosts', metavar='HOST_FILE', required=True, help='global host file')
 parser.add_argument('-output', dest='output', metavar='OUTPUT_FILE', required=True, help='generated list')
 args = parser.parse_args();
 
 # read from .conf file
 fd_conf = open(args.conf, 'r')
-cluster = ClusterProto()
-text_format.Merge(str(fd_conf.read()), cluster)
+job = JobProto()
+text_format.Merge(str(fd_conf.read()), job)
+cluster = job.cluster
 nworker_procs = cluster.nworker_groups * cluster.nworkers_per_group / cluster.nworkers_per_procs
 nserver_procs = cluster.nserver_groups * cluster.nservers_per_group / cluster.nservers_per_procs
 nprocs = 0
@@ -39,10 +40,10 @@ fd_hosts.close()
 # write to output file
 num_hosts = len(hosts)
 if (num_hosts == 0):
-  print "contains no valid host %s" % args.hosts
+  print "Contains no valid host %s" % args.hosts
   sys.exit(1)
 fd_output = open(args.output, 'w')
 for i in range(nprocs):
   fd_output.write(hosts[i % num_hosts] + '\n')
 fd_output.close()
-print 'generate host list at %s' % args.output
+print 'Generate host list to %s' % args.output



[2/3] incubator-singa git commit: SINGA-36 Clean ModelProto, ClusterProto, JobProto and driver program

Posted by wa...@apache.org.
SINGA-36 Clean ModelProto, ClusterProto, JobProto and driver program

The ClusterProto and ModelProto are merged into JobProto.
A SingaProto is added for global singa configuration including zookeeper info, log dir setting.

The driver program (i.e., main.cc) cleaned.
1. Users include singa.h and their own headers the the drvier program.
2. They prepare the job configuration
and singa configuration (may make this transparent to users, e.g., through environment variable, SINGA_HOME).
3. They register the layers them implemented.
4. Call the trainer to start the training
After linking the driver program with libsinga, users pass the executable to bin/singa-run.sh
together with the workspace where job.conf is stored to submit the job

Some layers are moved into optional_layer.cc(.h) if they have dependencies on external libs (e.g., lmdb and opencv).
If users do not need these layers, they do not need to install the dependencies. TODO update configure script to add
these options. USE_OPTIONAL_LAYER controls the compliation of optional_layer.cc(.h).

Move command line argument singa_conf into trainer.cc.
Usually, users do not need to pass this argument explicitly.
The sing-run.sh script will find it automatically.

modify scripts to support new singa command line

minor change on example job config

add SubmitJob func in singa.h to simplify the job submission API.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/0478e8cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/0478e8cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/0478e8cf

Branch: refs/heads/master
Commit: 0478e8cfdb54688c7c627cd328af04aa6aa30a63
Parents: 29de863
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Wed Jul 22 15:51:08 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Jul 23 11:52:12 2015 +0800

----------------------------------------------------------------------
 .gitignore                         |   1 +
 Makefile.am                        |  48 +---
 Makefile.example                   |   5 +-
 Makefile.in                        | 107 ++++----
 bin/singa-console.sh               |   6 +-
 bin/singa-run.sh                   |  57 ++--
 bin/singa-stop.sh                  |  14 +-
 conf/singa.conf                    |   2 +-
 examples/cifar10/Makefile.example  |   6 +-
 examples/cifar10/cluster.conf      |   7 -
 examples/cifar10/create_shard.cc   |   5 +-
 examples/cifar10/job.conf          | 228 ++++++++++++++++
 examples/cifar10/model.conf        | 222 ---------------
 examples/mnist/Makefile.example    |   2 +-
 examples/mnist/batch.sh            |  41 ---
 examples/mnist/cluster.conf        |   7 -
 examples/mnist/conv.conf           |  11 +
 examples/mnist/job.conf            | 230 ++++++++++++++++
 examples/mnist/model.conf          | 224 ----------------
 examples/mnist/run.sh              |  40 ---
 include/neuralnet/base_layer.h     |   2 +-
 include/neuralnet/layer.h          |  18 +-
 include/neuralnet/neuralnet.h      |   2 +-
 include/neuralnet/optional_layer.h |  28 ++
 include/singa.h                    |  32 +++
 include/trainer/server.h           |   2 +-
 include/trainer/trainer.h          |  35 ++-
 include/trainer/worker.h           |   2 +-
 include/utils/cluster.h            |  27 +-
 include/utils/common.h             |   6 +
 include/utils/param.h              |   2 +-
 include/utils/updater.h            |   2 +-
 src/main.cc                        |  50 ++--
 src/neuralnet/layer.cc             | 163 ++---------
 src/neuralnet/neuralnet.cc         |   5 +-
 src/neuralnet/optional_layer.cc    | 104 +++++++
 src/proto/cluster.proto            |  61 -----
 src/proto/job.proto                | 461 ++++++++++++++++++++++++++++++++
 src/proto/model.proto              | 427 -----------------------------
 src/proto/singa.proto              |   8 +
 src/test/test_cluster.cc           |   1 -
 src/trainer/trainer.cc             |  27 +-
 src/trainer/worker.cc              |   4 +-
 src/utils/cluster.cc               |  25 +-
 src/utils/param.cc                 |   2 +-
 src/utils/tool.cc                  |   4 +-
 src/utils/updater.cc               |   2 +-
 tool/gen_hosts.py                  |  13 +-
 48 files changed, 1348 insertions(+), 1430 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 9abefe5..a88e846 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@
 *.pb.cc
 *.hosts
 *.id
+*.pids
 *.tmp
 *.out
 tool/pb2/*

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index cecc5ec..56facbb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -10,20 +10,17 @@ AC_CXXFLAGS = -O3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
 
 INCLUDES = -I$(top_srcdir)/include
 
-PROTOS := $(top_srcdir)/src/proto/cluster.proto \
-          $(top_srcdir)/src/proto/model.proto \
+PROTOS := $(top_srcdir)/src/proto/singa.proto \
+          $(top_srcdir)/src/proto/job.proto \
           $(top_srcdir)/src/proto/common.proto
-PROTO_SRCS := src/proto/cluster.pb.cc \
-              src/proto/model.pb.cc \
+PROTO_SRCS := src/proto/singa.pb.cc \
+              src/proto/job.pb.cc \
               src/proto/common.pb.cc
-PROTO_HDRS := src/proto/cluster.pb.h \
-              src/proto/model.pb.h \
-              src/proto/common.pb.h
-PROTO_OBJS := src/proto/cluster.pb.o \
-              src/proto/model.pb.o \
-              src/proto/common.pb.o
-PROTO_PYS := tool/pb2/cluster_pb2.py \
-             tool/pb2/model_pb2.py \
+PROTO_HDRS := include/proto/singa.pb.h \
+              include/proto/job.pb.h \
+              include/proto/common.pb.h
+PROTO_PYS := tool/pb2/singa_pb2.py \
+             tool/pb2/job_pb2.py \
              tool/pb2/common_pb2.py
 
 SINGA_SRCS := src/utils/cluster.cc \
@@ -39,11 +36,13 @@ SINGA_SRCS := src/utils/cluster.cc \
               src/trainer/trainer.cc \
               src/neuralnet/base_layer.cc \
               src/neuralnet/neuralnet.cc \
+              src/neuralnet/optional_layer.cc \
               src/neuralnet/layer.cc \
               src/communication/socket.cc \
               src/communication/msg.cc
 
-SINGA_HDRS := include/utils/cluster.h \
+SINGA_HDRS := include/singa.h \
+              include/utils/cluster.h \
               include/utils/cluster_rt.h \
               include/utils/param.h \
               include/utils/common.h \
@@ -59,6 +58,7 @@ SINGA_HDRS := include/utils/cluster.h \
               include/trainer/trainer.h \
               include/neuralnet/base_layer.h \
               include/neuralnet/layer.h \
+              include/neuralnet/optional_layer.h \
               include/neuralnet/neuralnet.h \
               include/mshadow/tensor_expr.h \
               include/mshadow/tensor_container.h \
@@ -71,29 +71,9 @@ SINGA_HDRS := include/utils/cluster.h \
               include/communication/msg.h \
               include/communication/socket.h
 
-SINGA_OBJS := src/proto/cluster.pb.o \
-              src/proto/model.pb.o \
-              src/utils/cluster.o \
-              src/utils/cluster_rt.o \
-              src/utils/graph.o \
-              src/utils/common.o \
-              src/utils/param.o \
-              src/utils/updater.o \
-              src/utils/data_shard.o \
-              src/utils/blob.o \
-              src/trainer/server.o \
-              src/trainer/worker.o \
-              src/trainer/pm_server.o \
-              src/trainer/trainer.o \
-              src/trainer/pm_worker.o \
-              src/neuralnet/base_layer.o \
-              src/neuralnet/neuralnet.o \
-              src/neuralnet/layer.o \
-              src/communication/socket.o \
-              src/communication/msg.o
 
 lib_LTLIBRARIES = libsinga.la
-libsinga_la_SOURCES = $(PROTO_HDRS) $(PROTO_SRCS) $(SINGA_SRCS) $(SINGA_HDRS)
+libsinga_la_SOURCES = $(PROTO_HDRS) $(PROTO_SRCS) $(SINGA_HDRS) $(SINGA_SRCS)
 libsinga_la_CXXFLAGS = -O3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
                        $(MSHADOW_FLAGS) -DCPU_ONLY=1 -funroll-loops -gdwarf-2 \
                        -gstrict-dwarf -Woverloaded-virtual -DTHREADED -fpermissive

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/Makefile.example
----------------------------------------------------------------------
diff --git a/Makefile.example b/Makefile.example
index 2195b91..79201cb 100644
--- a/Makefile.example
+++ b/Makefile.example
@@ -4,7 +4,7 @@ HOME_DIR := /usr
 # Lib folder for system and external libs. You may need to change it.
 LIBRARY_DIRS := $(HOME_DIR)/lib64 $(HOME_DIR)/lib $(HOME_DIR)/local/lib
 # Header folder for system and external libs. You may need to change it.
-INCLUDE_DIRS := $(HOME_DIR)/include ./include $(HOME_DIR)/local/include/zookeeper 
+INCLUDE_DIRS := $(HOME_DIR)/include ./include $(HOME_DIR)/local/include/zookeeper
 # g++ location, should support c++11, tested with 4.8.1
 CXX := g++
 
@@ -15,7 +15,7 @@ LIBRARIES := glog gflags protobuf rt opencv_highgui opencv_imgproc opencv_core\
 LDFLAGS := $(foreach librarydir, $(LIBRARY_DIRS), -L$(librarydir))\
 	$(foreach library, $(LIBRARIES), -l$(library))
 # Folder to store compiled files
-BUILD_DIR := build
+BUILD_DIR := .libs
 MSHADOW_FLAGS :=-DMSHADOW_USE_CUDA=0 -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0
 ZK_FLAGS :=-DTHREADED -fpermissive
 CXXFLAGS := -O3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
@@ -51,6 +51,7 @@ OBJS := $(sort $(SINGA_OBJS) $(TEST_OBJS) )
 .PHONY: singa test
 
 singa: $(PROTO_OBJS) $(SINGA_OBJS)
+	$(CXX) -shared -o $(BUILD_DIR)/libsinga.so $(SINGA_OBJS)
 	$(CXX) $(SINGA_OBJS) src/main.cc -o singa $(CXXFLAGS) $(LDFLAGS)
 	@echo
 	$(CXX) $(SINGA_OBJS) src/utils/tool.cc -o singatool $(CXXFLAGS) $(LDFLAGS)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/Makefile.in
----------------------------------------------------------------------
diff --git a/Makefile.in b/Makefile.in
index 34ddd0c..c7fd578 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -82,8 +82,8 @@ LTLIBRARIES = $(lib_LTLIBRARIES)
 libsinga_la_LIBADD =
 am__objects_1 =
 am__dirstamp = $(am__leading_dot)dirstamp
-am__objects_2 = src/proto/libsinga_la-cluster.pb.lo \
-	src/proto/libsinga_la-model.pb.lo \
+am__objects_2 = src/proto/libsinga_la-singa.pb.lo \
+	src/proto/libsinga_la-job.pb.lo \
 	src/proto/libsinga_la-common.pb.lo
 am__objects_3 = src/utils/libsinga_la-cluster.lo \
 	src/utils/libsinga_la-cluster_rt.lo \
@@ -97,11 +97,12 @@ am__objects_3 = src/utils/libsinga_la-cluster.lo \
 	src/trainer/libsinga_la-trainer.lo \
 	src/neuralnet/libsinga_la-base_layer.lo \
 	src/neuralnet/libsinga_la-neuralnet.lo \
+	src/neuralnet/libsinga_la-optional_layer.lo \
 	src/neuralnet/libsinga_la-layer.lo \
 	src/communication/libsinga_la-socket.lo \
 	src/communication/libsinga_la-msg.lo
 am_libsinga_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
-	$(am__objects_3) $(am__objects_1)
+	$(am__objects_1) $(am__objects_3)
 libsinga_la_OBJECTS = $(am_libsinga_la_OBJECTS)
 AM_V_lt = $(am__v_lt_$(V))
 am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
@@ -304,24 +305,20 @@ AC_CXXFLAGS = -O3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
               $(MSHADOW_FLAGS) -DCPU_ONLY=1 -funroll-loops
 
 INCLUDES = -I$(top_srcdir)/include
-PROTOS := $(top_srcdir)/src/proto/cluster.proto \
-          $(top_srcdir)/src/proto/model.proto \
+PROTOS := $(top_srcdir)/src/proto/singa.proto \
+          $(top_srcdir)/src/proto/job.proto \
           $(top_srcdir)/src/proto/common.proto
 
-PROTO_SRCS := src/proto/cluster.pb.cc \
-              src/proto/model.pb.cc \
+PROTO_SRCS := src/proto/singa.pb.cc \
+              src/proto/job.pb.cc \
               src/proto/common.pb.cc
 
-PROTO_HDRS := src/proto/cluster.pb.h \
-              src/proto/model.pb.h \
-              src/proto/common.pb.h
+PROTO_HDRS := include/proto/singa.pb.h \
+              include/proto/job.pb.h \
+              include/proto/common.pb.h
 
-PROTO_OBJS := src/proto/cluster.pb.o \
-              src/proto/model.pb.o \
-              src/proto/common.pb.o
-
-PROTO_PYS := tool/pb2/cluster_pb2.py \
-             tool/pb2/model_pb2.py \
+PROTO_PYS := tool/pb2/singa_pb2.py \
+             tool/pb2/job_pb2.py \
              tool/pb2/common_pb2.py
 
 SINGA_SRCS := src/utils/cluster.cc \
@@ -337,11 +334,13 @@ SINGA_SRCS := src/utils/cluster.cc \
               src/trainer/trainer.cc \
               src/neuralnet/base_layer.cc \
               src/neuralnet/neuralnet.cc \
+              src/neuralnet/optional_layer.cc \
               src/neuralnet/layer.cc \
               src/communication/socket.cc \
               src/communication/msg.cc
 
-SINGA_HDRS := include/utils/cluster.h \
+SINGA_HDRS := include/singa.h \
+              include/utils/cluster.h \
               include/utils/cluster_rt.h \
               include/utils/param.h \
               include/utils/common.h \
@@ -357,6 +356,7 @@ SINGA_HDRS := include/utils/cluster.h \
               include/trainer/trainer.h \
               include/neuralnet/base_layer.h \
               include/neuralnet/layer.h \
+              include/neuralnet/optional_layer.h \
               include/neuralnet/neuralnet.h \
               include/mshadow/tensor_expr.h \
               include/mshadow/tensor_container.h \
@@ -369,29 +369,8 @@ SINGA_HDRS := include/utils/cluster.h \
               include/communication/msg.h \
               include/communication/socket.h
 
-SINGA_OBJS := src/proto/cluster.pb.o \
-              src/proto/model.pb.o \
-              src/utils/cluster.o \
-              src/utils/cluster_rt.o \
-              src/utils/graph.o \
-              src/utils/common.o \
-              src/utils/param.o \
-              src/utils/updater.o \
-              src/utils/data_shard.o \
-              src/utils/blob.o \
-              src/trainer/server.o \
-              src/trainer/worker.o \
-              src/trainer/pm_server.o \
-              src/trainer/trainer.o \
-              src/trainer/pm_worker.o \
-              src/neuralnet/base_layer.o \
-              src/neuralnet/neuralnet.o \
-              src/neuralnet/layer.o \
-              src/communication/socket.o \
-              src/communication/msg.o
-
 lib_LTLIBRARIES = libsinga.la
-libsinga_la_SOURCES = $(PROTO_HDRS) $(PROTO_SRCS) $(SINGA_SRCS) $(SINGA_HDRS)
+libsinga_la_SOURCES = $(PROTO_HDRS) $(PROTO_SRCS) $(SINGA_HDRS) $(SINGA_SRCS)
 libsinga_la_CXXFLAGS = -O3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
                        $(MSHADOW_FLAGS) -DCPU_ONLY=1 -funroll-loops -gdwarf-2 \
                        -gstrict-dwarf -Woverloaded-virtual -DTHREADED -fpermissive
@@ -520,9 +499,9 @@ src/proto/$(am__dirstamp):
 src/proto/$(DEPDIR)/$(am__dirstamp):
 	@$(MKDIR_P) src/proto/$(DEPDIR)
 	@: > src/proto/$(DEPDIR)/$(am__dirstamp)
-src/proto/libsinga_la-cluster.pb.lo: src/proto/$(am__dirstamp) \
+src/proto/libsinga_la-singa.pb.lo: src/proto/$(am__dirstamp) \
 	src/proto/$(DEPDIR)/$(am__dirstamp)
-src/proto/libsinga_la-model.pb.lo: src/proto/$(am__dirstamp) \
+src/proto/libsinga_la-job.pb.lo: src/proto/$(am__dirstamp) \
 	src/proto/$(DEPDIR)/$(am__dirstamp)
 src/proto/libsinga_la-common.pb.lo: src/proto/$(am__dirstamp) \
 	src/proto/$(DEPDIR)/$(am__dirstamp)
@@ -571,6 +550,9 @@ src/neuralnet/libsinga_la-base_layer.lo:  \
 	src/neuralnet/$(DEPDIR)/$(am__dirstamp)
 src/neuralnet/libsinga_la-neuralnet.lo: src/neuralnet/$(am__dirstamp) \
 	src/neuralnet/$(DEPDIR)/$(am__dirstamp)
+src/neuralnet/libsinga_la-optional_layer.lo:  \
+	src/neuralnet/$(am__dirstamp) \
+	src/neuralnet/$(DEPDIR)/$(am__dirstamp)
 src/neuralnet/libsinga_la-layer.lo: src/neuralnet/$(am__dirstamp) \
 	src/neuralnet/$(DEPDIR)/$(am__dirstamp)
 src/communication/$(am__dirstamp):
@@ -659,12 +641,14 @@ mostlyclean-compile:
 	-rm -f src/neuralnet/libsinga_la-layer.lo
 	-rm -f src/neuralnet/libsinga_la-neuralnet.$(OBJEXT)
 	-rm -f src/neuralnet/libsinga_la-neuralnet.lo
-	-rm -f src/proto/libsinga_la-cluster.pb.$(OBJEXT)
-	-rm -f src/proto/libsinga_la-cluster.pb.lo
+	-rm -f src/neuralnet/libsinga_la-optional_layer.$(OBJEXT)
+	-rm -f src/neuralnet/libsinga_la-optional_layer.lo
 	-rm -f src/proto/libsinga_la-common.pb.$(OBJEXT)
 	-rm -f src/proto/libsinga_la-common.pb.lo
-	-rm -f src/proto/libsinga_la-model.pb.$(OBJEXT)
-	-rm -f src/proto/libsinga_la-model.pb.lo
+	-rm -f src/proto/libsinga_la-job.pb.$(OBJEXT)
+	-rm -f src/proto/libsinga_la-job.pb.lo
+	-rm -f src/proto/libsinga_la-singa.pb.$(OBJEXT)
+	-rm -f src/proto/libsinga_la-singa.pb.lo
 	-rm -f src/singa-main.$(OBJEXT)
 	-rm -f src/trainer/libsinga_la-server.$(OBJEXT)
 	-rm -f src/trainer/libsinga_la-server.lo
@@ -699,9 +683,10 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@src/neuralnet/$(DEPDIR)/libsinga_la-base_layer.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/neuralnet/$(DEPDIR)/libsinga_la-layer.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/neuralnet/$(DEPDIR)/libsinga_la-neuralnet.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@src/proto/$(DEPDIR)/libsinga_la-cluster.pb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/neuralnet/$(DEPDIR)/libsinga_la-optional_layer.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/proto/$(DEPDIR)/libsinga_la-common.pb.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@src/proto/$(DEPDIR)/libsinga_la-model.pb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/proto/$(DEPDIR)/libsinga_la-job.pb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/proto/$(DEPDIR)/libsinga_la-singa.pb.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/trainer/$(DEPDIR)/libsinga_la-server.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/trainer/$(DEPDIR)/libsinga_la-trainer.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/trainer/$(DEPDIR)/libsinga_la-worker.Plo@am__quote@
@@ -742,21 +727,21 @@ distclean-compile:
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(LTCXXCOMPILE) -c -o $@ $<
 
-src/proto/libsinga_la-cluster.pb.lo: src/proto/cluster.pb.cc
-@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -MT src/proto/libsinga_la-cluster.pb.lo -MD -MP -MF src/proto/$(DEPDIR)/libsinga_la-cluster.pb.Tpo -c -o src/proto/libsinga_la-cluster.pb.lo `test -f 'src/proto/cluster.pb.cc' || echo '$(srcdir)/'`src/proto/cluster.pb.cc
-@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/proto/$(DEPDIR)/libsinga_la-cluster.pb.Tpo src/proto/$(DEPDIR)/libsinga_la-cluster.pb.Plo
+src/proto/libsinga_la-singa.pb.lo: src/proto/singa.pb.cc
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -MT src/proto/libsinga_la-singa.pb.lo -MD -MP -MF src/proto/$(DEPDIR)/libsinga_la-singa.pb.Tpo -c -o src/proto/libsinga_la-singa.pb.lo `test -f 'src/proto/singa.pb.cc' || echo '$(srcdir)/'`src/proto/singa.pb.cc
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/proto/$(DEPDIR)/libsinga_la-singa.pb.Tpo src/proto/$(DEPDIR)/libsinga_la-singa.pb.Plo
 @am__fastdepCXX_FALSE@	$(AM_V_CXX) @AM_BACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='src/proto/cluster.pb.cc' object='src/proto/libsinga_la-cluster.pb.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='src/proto/singa.pb.cc' object='src/proto/libsinga_la-singa.pb.lo' libtool=yes @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -c -o src/proto/libsinga_la-cluster.pb.lo `test -f 'src/proto/cluster.pb.cc' || echo '$(srcdir)/'`src/proto/cluster.pb.cc
+@am__fastdepCXX_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -c -o src/proto/libsinga_la-singa.pb.lo `test -f 'src/proto/singa.pb.cc' || echo '$(srcdir)/'`src/proto/singa.pb.cc
 
-src/proto/libsinga_la-model.pb.lo: src/proto/model.pb.cc
-@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -MT src/proto/libsinga_la-model.pb.lo -MD -MP -MF src/proto/$(DEPDIR)/libsinga_la-model.pb.Tpo -c -o src/proto/libsinga_la-model.pb.lo `test -f 'src/proto/model.pb.cc' || echo '$(srcdir)/'`src/proto/model.pb.cc
-@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/proto/$(DEPDIR)/libsinga_la-model.pb.Tpo src/proto/$(DEPDIR)/libsinga_la-model.pb.Plo
+src/proto/libsinga_la-job.pb.lo: src/proto/job.pb.cc
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -MT src/proto/libsinga_la-job.pb.lo -MD -MP -MF src/proto/$(DEPDIR)/libsinga_la-job.pb.Tpo -c -o src/proto/libsinga_la-job.pb.lo `test -f 'src/proto/job.pb.cc' || echo '$(srcdir)/'`src/proto/job.pb.cc
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/proto/$(DEPDIR)/libsinga_la-job.pb.Tpo src/proto/$(DEPDIR)/libsinga_la-job.pb.Plo
 @am__fastdepCXX_FALSE@	$(AM_V_CXX) @AM_BACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='src/proto/model.pb.cc' object='src/proto/libsinga_la-model.pb.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='src/proto/job.pb.cc' object='src/proto/libsinga_la-job.pb.lo' libtool=yes @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -c -o src/proto/libsinga_la-model.pb.lo `test -f 'src/proto/model.pb.cc' || echo '$(srcdir)/'`src/proto/model.pb.cc
+@am__fastdepCXX_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -c -o src/proto/libsinga_la-job.pb.lo `test -f 'src/proto/job.pb.cc' || echo '$(srcdir)/'`src/proto/job.pb.cc
 
 src/proto/libsinga_la-common.pb.lo: src/proto/common.pb.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -MT src/proto/libsinga_la-common.pb.lo -MD -MP -MF src/proto/$(DEPDIR)/libsinga_la-common.pb.Tpo -c -o src/proto/libsinga_la-common.pb.lo `test -f 'src/proto/common.pb.cc' || echo '$(srcdir)/'`src/proto/common.pb.cc
@@ -870,6 +855,14 @@ src/neuralnet/libsinga_la-neuralnet.lo: src/neuralnet/neuralnet.cc
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -c -o src/neuralnet/libsinga_la-neuralnet.lo `test -f 'src/neuralnet/neuralnet.cc' || echo '$(srcdir)/'`src/neuralnet/neuralnet.cc
 
+src/neuralnet/libsinga_la-optional_layer.lo: src/neuralnet/optional_layer.cc
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -MT src/neuralnet/libsinga_la-optional_layer.lo -MD -MP -MF src/neuralnet/$(DEPDIR)/libsinga_la-optional_layer.Tpo -c -o src/neuralnet/libsinga_la-optional_layer.lo `test -f 'src/neuralnet/optional_layer.cc' || echo '$(srcdir)/'`src/neuralnet/optional_layer.cc
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/neuralnet/$(DEPDIR)/libsinga_la-optional_layer.Tpo src/neuralnet/$(DEPDIR)/libsinga_la-optional_layer.Plo
+@am__fastdepCXX_FALSE@	$(AM_V_CXX) @AM_BACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='src/neuralnet/optional_layer.cc' object='src/neuralnet/libsinga_la-optional_layer.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -c -o src/neuralnet/libsinga_la-optional_layer.lo `test -f 'src/neuralnet/optional_layer.cc' || echo '$(srcdir)/'`src/neuralnet/optional_layer.cc
+
 src/neuralnet/libsinga_la-layer.lo: src/neuralnet/layer.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsinga_la_CXXFLAGS) $(CXXFLAGS) -MT src/neuralnet/libsinga_la-layer.lo -MD -MP -MF src/neuralnet/$(DEPDIR)/libsinga_la-layer.Tpo -c -o src/neuralnet/libsinga_la-layer.lo `test -f 'src/neuralnet/layer.cc' || echo '$(srcdir)/'`src/neuralnet/layer.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/neuralnet/$(DEPDIR)/libsinga_la-layer.Tpo src/neuralnet/$(DEPDIR)/libsinga_la-layer.Plo

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/bin/singa-console.sh
----------------------------------------------------------------------
diff --git a/bin/singa-console.sh b/bin/singa-console.sh
index 8844bca..9ed3075 100755
--- a/bin/singa-console.sh
+++ b/bin/singa-console.sh
@@ -24,9 +24,9 @@
 #
 
 usage="Usage:\n
-       singa-console.sh list         :  list running singa jobs\n
-       singa-console.sh view JOB_ID  :  view procs of a singa job\n
-       singa-console.sh kill JOB_ID  :  kill a singa job"
+       # singa-console.sh list         :  list running singa jobs\n
+       # singa-console.sh view JOB_ID  :  view procs of a singa job\n
+       # singa-console.sh kill JOB_ID  :  kill a singa job"
 
 if [ $# == 0 ]; then
   echo -e $usage

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/bin/singa-run.sh
----------------------------------------------------------------------
diff --git a/bin/singa-run.sh b/bin/singa-run.sh
index a478221..d434331 100755
--- a/bin/singa-run.sh
+++ b/bin/singa-run.sh
@@ -23,23 +23,38 @@
 # run a Singa job
 #
 
-usage="Usage: singa-run.sh -conf=CONF_DIR
-       (CONF_DIR should contain cluster.conf && model.conf)"
+usage="Usage: singa-run.sh -workspace=YOUR_WORKSPACE [ --resume ]\n
+       # workspace should contain job.conf\n
+       # set --resume if want to recover a job\n
+       ### NOTICE ###\n
+       # if you are using model.conf + cluster.conf,\n
+       # please see how to combine them to a job.conf:\n
+       # http://singa.incubator.apache.org/quick-start.html"
 
 # check arguments
-if [ $# != 1 ] || [[ $1 != "-conf="* ]]; then
-  echo $usage
+while [ $# != 0 ]; do
+  if [[ $1 == "-workspace="* ]]; then
+    workspace=$1
+  elif [ $1 == "--resume" ]; then
+    resume=1
+  else
+    echo -e $usage
+    exit 1
+  fi
+  shift
+done
+if [ -z $workspace ]; then
+  echo -e $usage
   exit 1
 fi
 
 # get environment variables
 . `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
 # get workspace path
-workspace=`cd "${1:6}">/dev/null; pwd`
-cluster_conf=$workspace/cluster.conf
-model_conf=$workspace/model.conf
-if [ ! -f $cluster_conf ] || [ ! -f $model_conf ]; then
-  echo cluster.conf or model.conf not exists in $workspace
+workspace=`cd "${workspace:11}">/dev/null; pwd`
+job_conf=$workspace/job.conf
+if [ ! -f $job_conf ]; then
+  echo job.conf not exists in $workspace
   exit 1
 fi
 cd $SINGA_HOME
@@ -51,7 +66,7 @@ fi
 
 # generate host file
 host_file=$workspace/job.hosts
-python $SINGA_HOME/tool/gen_hosts.py -conf=$cluster_conf \
+python $SINGA_HOME/tool/gen_hosts.py -conf=$job_conf \
                                      -hosts=$SINGA_CONF/hostfile \
                                      -output=$host_file \
                                      || exit 1
@@ -59,24 +74,32 @@ python $SINGA_HOME/tool/gen_hosts.py -conf=$cluster_conf \
 # generate unique job id
 ./singatool create 1>$workspace/job.id || exit 1
 job_id=`cat $workspace/job.id`
-echo generate job id at $workspace/job.id [job_id = $job_id]
+echo Generate job id to $workspace/job.id [job_id = $job_id]
+
+# set command to run singa
+singa_run="./singa -workspace=$workspace -job=$job_id"
+if [ ! -z $resume ]; then
+  singa_run="$singa_run --resume"
+fi
+singa_sshrun="cd $SINGA_HOME; $singa_run"
 
 # ssh and start singa processes
 ssh_options="-oStrictHostKeyChecking=no \
 -oUserKnownHostsFile=/dev/null \
 -oLogLevel=quiet"
 hosts=`cat $host_file | cut -d ' ' -f 1`
-singa_run="./singa -cluster=$cluster_conf -model=$model_conf \
-           -job=$job_id"
-singa_sshrun="cd $SINGA_HOME; $singa_run"
-
 for i in ${hosts[@]} ; do
   if [ $i = localhost ] ; then
-    echo executing : $singa_run
+    echo Executing : $singa_run
     $singa_run &
   else
-    echo executing @ $i : $singa_sshrun
+    echo Executing @ $i : $singa_sshrun
     ssh $ssh_options $i $singa_sshrun &
   fi
 done
+
+# generate pid list for this job
+sleep 2
+./singatool view $job_id 1>$workspace/job.pids || exit
+echo Generate pid list to $workspace/job.pids
 wait

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/bin/singa-stop.sh
----------------------------------------------------------------------
diff --git a/bin/singa-stop.sh b/bin/singa-stop.sh
index dd3e7bf..ff67f32 100755
--- a/bin/singa-stop.sh
+++ b/bin/singa-stop.sh
@@ -23,15 +23,6 @@
 # clean up singa processes and zookeeper metadata
 #
 
-# usage="Usage: \n \
-#       (local process): singa-stop.sh \n \
-#       (distributed): singa-stop.sh HOST_FILE"
-# 
-# if [ $# -gt 1 ]; then
-#   echo -e $usage
-#   exit 1
-# fi
-
 # get environment variables
 . `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
 cd $SINGA_HOME
@@ -44,7 +35,7 @@ ssh_options="-oStrictHostKeyChecking=no \
 hosts=`cat $host_file | cut -d ' ' -f 1`
 singa_kill="killall -s SIGKILL -r singa"
 for i in ${hosts[@]}; do
-  echo kill singa @ $i ...
+  echo Kill singa @ $i ...
   if [ $i == localhost ]; then
     $singa_kill
   else
@@ -55,6 +46,5 @@ done
 sleep 2
 
 # remove zk data
-# singatool need global conf under SINGA_HOME
-echo cleanning metadata in zookeeper ...
+echo Cleanning metadata in zookeeper ...
 ./singatool cleanup || exit 1

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/conf/singa.conf
----------------------------------------------------------------------
diff --git a/conf/singa.conf b/conf/singa.conf
index f6c351b..fad37d5 100644
--- a/conf/singa.conf
+++ b/conf/singa.conf
@@ -2,4 +2,4 @@
 zookeeper_host: "localhost:2181"
 
 # set if you want to change log directory
-# log_dir: "/tmp/singa-log/"
+log_dir: "/tmp/singa-log/"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/cifar10/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/cifar10/Makefile.example b/examples/cifar10/Makefile.example
index aa9bf6f..5d3fa21 100644
--- a/examples/cifar10/Makefile.example
+++ b/examples/cifar10/Makefile.example
@@ -9,9 +9,9 @@ cifar-10-binary-bin:
 	tar xf cifar-10-binary.tar.gz
 
 create:
-	$(CXX) create_shard.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
-		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
-		-o create_shard.bin
+	$(CXX) create_shard.cc -std=c++11 -lsinga -lprotobuf -lglog -lzookeeper_mt \
+		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
+		-Wl,-rpath=../../.libs/  -o create_shard.bin
 	mkdir cifar10_train_shard
 	mkdir cifar10_test_shard
 	./create_shard.bin cifar-10-batches-bin .

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/cifar10/cluster.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/cluster.conf b/examples/cifar10/cluster.conf
deleted file mode 100644
index e7e3400..0000000
--- a/examples/cifar10/cluster.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-nworker_groups: 1
-nserver_groups: 1
-nservers_per_group: 1
-nworkers_per_group: 1
-nworkers_per_procs: 1
-nservers_per_procs: 1
-workspace: "examples/cifar10/"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/cifar10/create_shard.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/create_shard.cc b/examples/cifar10/create_shard.cc
index 9df92d6..5b8c245 100644
--- a/examples/cifar10/create_shard.cc
+++ b/examples/cifar10/create_shard.cc
@@ -16,10 +16,7 @@
 #include <cstdint>
 #include <iostream>
 
-#include "utils/data_shard.h"
-#include "utils/common.h"
-#include "proto/model.pb.h"
-#include "proto/common.pb.h"
+#include "singa.h"
 
 using std::string;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/cifar10/job.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/job.conf b/examples/cifar10/job.conf
new file mode 100644
index 0000000..f3b21b8
--- /dev/null
+++ b/examples/cifar10/job.conf
@@ -0,0 +1,228 @@
+cluster {
+  nworker_groups: 1
+  nserver_groups: 1
+}
+
+model {
+  name: "cifar10-convnet"
+  train_steps: 1000
+  test_steps: 100
+  test_frequency:300
+  display_frequency:30
+  alg: kBackPropagation
+  updater{
+    weight_decay:0.004
+    lr_change: kFixedStep
+    type: kSGD
+    fixedstep_conf:{
+      step:0
+      step:60000
+      step:65000
+      step_lr:0.001
+      step_lr:0.0001
+      step_lr:0.00001
+    }
+  }
+  neuralnet {
+  layer{
+    name: "data"
+    type: kShardData
+    sharddata_conf {
+      path: "examples/cifar10/cifar10_train_shard"
+      batchsize: 16
+      random_skip: 5000
+    }
+    exclude: kTest
+  }
+  layer{
+    name: "data"
+    type: kShardData
+    sharddata_conf {
+      path: "examples/cifar10/cifar10_test_shard"
+      batchsize: 100
+    }
+    exclude: kTrain
+  }
+  layer{
+    name:"rgb"
+    type: kRGBImage
+    srclayers: "data"
+    rgbimage_conf {
+      meanfile: "examples/cifar10/image_mean.bin"
+    }
+  }
+  layer{
+    name: "label"
+    type: kLabel
+    srclayers: "data"
+  }
+
+  layer {
+    name: "conv1"
+    type: kConvolution
+    srclayers: "rgb"
+    convolution_conf {
+      num_filters: 32
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w1"
+      init_method:kGaussian
+      std:0.0001
+      learning_rate_multiplier:1.0
+    }
+    param {
+      name: "b1"
+      init_method: kConstant
+      learning_rate_multiplier:2.0
+      value:0
+    }
+  }
+
+  layer {
+    name: "pool1"
+    type: kPooling
+    srclayers: "conv1"
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    name: "relu1"
+    type: kReLU
+    srclayers:"pool1"
+  }
+  layer {
+    name: "norm1"
+    type: kLRN
+    lrn_conf {
+      norm_region: WITHIN_CHANNEL
+      local_size: 3
+      alpha: 5e-05
+      beta: 0.75
+    }
+    srclayers:"relu1"
+  }
+  layer {
+    name: "conv2"
+    type: kConvolution
+    srclayers: "norm1"
+    convolution_conf {
+      num_filters: 32
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w2"
+      init_method:kGaussian
+      std:0.01
+      learning_rate_multiplier:1.0
+    }
+    param {
+      name: "b2"
+      init_method: kConstant
+      learning_rate_multiplier:2.0
+      value:0
+    }
+  }
+  layer {
+    name: "relu2"
+    type: kReLU
+    srclayers:"conv2"
+  }
+  layer {
+    name: "pool2"
+    type: kPooling
+    srclayers: "relu2"
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    name: "norm2"
+    type: kLRN
+    lrn_conf {
+      norm_region: WITHIN_CHANNEL
+      local_size: 3
+      alpha: 5e-05
+      beta: 0.75
+    }
+    srclayers:"pool2"
+  }
+  layer {
+    name: "conv3"
+    type: kConvolution
+    srclayers: "norm2"
+    convolution_conf {
+      num_filters: 64
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w3"
+      init_method:kGaussian
+      std:0.01
+    }
+    param {
+      name: "b3"
+      init_method: kConstant
+      value:0
+    }
+  }
+  layer {
+    name: "relu3"
+    type: kReLU
+    srclayers:"conv3"
+  }
+  layer {
+    name: "pool3"
+    type: kPooling
+    srclayers: "relu3"
+    pooling_conf {
+      pool: AVE
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    name: "ip1"
+    type: kInnerProduct
+    srclayers:"pool3"
+    innerproduct_conf {
+      num_output: 10
+    }
+    param {
+      name: "w4"
+      init_method:kGaussian
+      std:0.01
+      learning_rate_multiplier:1.0
+      weight_decay_multiplier:250
+    }
+    param {
+      name: "b4"
+      init_method: kConstant
+      learning_rate_multiplier:2.0
+      weight_decay_multiplier:0
+      value:0
+    }
+  }
+
+  layer{
+    name: "loss"
+    type: kSoftmaxLoss
+    softmaxloss_conf{
+      topk:1
+    }
+    srclayers:"ip1"
+    srclayers: "label"
+  }
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/cifar10/model.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/model.conf b/examples/cifar10/model.conf
deleted file mode 100644
index f0e677c..0000000
--- a/examples/cifar10/model.conf
+++ /dev/null
@@ -1,222 +0,0 @@
-name: "cifar10-convnet"
-train_steps: 1000
-test_steps: 100
-test_frequency:300
-display_frequency:30
-#checkpoint: "examples/cifar10/checkpoint/step2200-worker0.bin"
-alg: kBackPropagation
-updater{
-  weight_decay:0.004
-  lr_change: kFixedStep
-  type: kSGD
-  fixedstep_conf:{
-    step:0
-    step:60000
-    step:65000
-    step_lr:0.001
-    step_lr:0.0001
-    step_lr:0.00001
-  }
-}
-neuralnet {
-layer{
-  name: "data"
-  type: kShardData
-  sharddata_conf {
-    path: "examples/cifar10/cifar10_train_shard"
-    batchsize: 16
-    random_skip: 5000
-  }
-  exclude: kTest
-}
-layer{
-  name: "data"
-  type: kShardData
-  sharddata_conf {
-    path: "examples/cifar10/cifar10_test_shard"
-    batchsize: 100
-  }
-  exclude: kTrain
-}
-layer{
-  name:"rgb"
-  type: kRGBImage
-  srclayers: "data"
-  rgbimage_conf {
-    meanfile: "examples/cifar10/image_mean.bin"
-  }
-}
-layer{
-  name: "label"
-  type: kLabel
-  srclayers: "data"
-}
-
-layer {
-  name: "conv1"
-  type: kConvolution
-  srclayers: "rgb"
-  convolution_conf {
-    num_filters: 32
-    kernel: 5
-    stride: 1
-    pad:2
-  }
-  param {
-    name: "w1"
-    init_method:kGaussian
-    std:0.0001
-    learning_rate_multiplier:1.0
-  }
-  param {
-    name: "b1"
-    init_method: kConstant
-    learning_rate_multiplier:2.0
-    value:0
-  }
-}
-
-layer {
-  name: "pool1"
-  type: kPooling
-  srclayers: "conv1"
-  pooling_conf {
-    pool: MAX
-    kernel: 3
-    stride: 2
-  }
-}
-layer {
-  name: "relu1"
-  type: kReLU
-  srclayers:"pool1"
-}
-layer {
-  name: "norm1"
-  type: kLRN
-  lrn_conf {
-    norm_region: WITHIN_CHANNEL
-    local_size: 3
-    alpha: 5e-05
-    beta: 0.75
-  }
-  srclayers:"relu1"
-}
-layer {
-  name: "conv2"
-  type: kConvolution
-  srclayers: "norm1"
-  convolution_conf {
-    num_filters: 32
-    kernel: 5
-    stride: 1
-    pad:2
-  }
-  param {
-    name: "w2"
-    init_method:kGaussian
-    std:0.01
-    learning_rate_multiplier:1.0
-  }
-  param {
-    name: "b2"
-    init_method: kConstant
-    learning_rate_multiplier:2.0
-    value:0
-  }
-}
-layer {
-  name: "relu2"
-  type: kReLU
-  srclayers:"conv2"
-}
-layer {
-  name: "pool2"
-  type: kPooling
-  srclayers: "relu2"
-  pooling_conf {
-    pool: MAX
-    kernel: 3
-    stride: 2
-  }
-}
-layer {
-  name: "norm2"
-  type: kLRN
-  lrn_conf {
-    norm_region: WITHIN_CHANNEL
-    local_size: 3
-    alpha: 5e-05
-    beta: 0.75
-  }
-  srclayers:"pool2"
-}
-layer {
-  name: "conv3"
-  type: kConvolution
-  srclayers: "norm2"
-  convolution_conf {
-    num_filters: 64
-    kernel: 5
-    stride: 1
-    pad:2
-  }
-  param {
-    name: "w3"
-    init_method:kGaussian
-    std:0.01
-  }
-  param {
-    name: "b3"
-    init_method: kConstant
-    value:0
-  }
-}
-layer {
-  name: "relu3"
-  type: kReLU
-  srclayers:"conv3"
-}
-layer {
-  name: "pool3"
-  type: kPooling
-  srclayers: "relu3"
-  pooling_conf {
-    pool: AVE
-    kernel: 3
-    stride: 2
-  }
-}
-layer {
-  name: "ip1"
-  type: kInnerProduct
-  srclayers:"pool3"
-  innerproduct_conf {
-    num_output: 10
-  }
-  param {
-    name: "w4"
-    init_method:kGaussian
-    std:0.01
-    learning_rate_multiplier:1.0
-    weight_decay_multiplier:250
-  }
-  param {
-    name: "b4"
-    init_method: kConstant
-    learning_rate_multiplier:2.0
-    weight_decay_multiplier:0
-    value:0
-  }
-}
-
-layer{
-  name: "loss"
-  type: kSoftmaxLoss
-  softmaxloss_conf{
-    topk:1
-  }
-  srclayers:"ip1"
-  srclayers: "label"
-}
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/mnist/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/mnist/Makefile.example b/examples/mnist/Makefile.example
index 9016887..87399fe 100644
--- a/examples/mnist/Makefile.example
+++ b/examples/mnist/Makefile.example
@@ -13,7 +13,7 @@ mnist:
 	gunzip t10k-images-idx3-ubyte.gz && gunzip t10k-labels-idx1-ubyte.gz
 
 create:
-	$(CXX) create_shard.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
+	$(CXX) create_shard.cc -std=c++11 -lsinga -lprotobuf -lglog -lzookeeper_mt -I../../include \
 		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
 		-o create_shard.bin
 	mkdir mnist_train_shard

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/mnist/batch.sh
----------------------------------------------------------------------
diff --git a/examples/mnist/batch.sh b/examples/mnist/batch.sh
deleted file mode 100644
index ff6b8b4..0000000
--- a/examples/mnist/batch.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-for nservers in 1
-do
-  for nthreads in 2 4
-  do
-    for nworkers in 1 2 4 8 16
-    do
-      echo "nworkers: $nworkers" >examples/mnist/cluster.conf
-      echo "nservers: $nservers" >>examples/mnist/cluster.conf
-      echo "nthreads_per_server: $nthreads" >>examples/mnist/cluster.conf
-      echo  "workspace:\" /data1/wangwei/singa\"">>examples/mnist/cluster.conf
-      cat examples/mnist/cluster.conf
-      nprocs=$(($nworkers+$nservers))
-      log=log1k/${nworkers}w${nservers}s${nthreads}t
-      echo  $log $nprocs
-      ./examples/mnist/run.sh start $nprocs >$log 2>&1
-      sleep 4
-
-      while true
-      do
-        nstopped=0
-        to=$(($nprocs-1))
-        for worker in $(eval echo "{0..$to}")
-        do
-          if [ ! -e /home/wangwei/program/singa/$worker.lock ]
-          then
-            echo "$worker.lock is free"
-            nstopped=$(($nstopped+1))
-          fi
-        done
-        if [ $nstopped -eq $(($nprocs)) ]
-        then
-          break
-        else
-          sleep 5
-        fi
-      done
-    done
-  done
-done

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/mnist/cluster.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/cluster.conf b/examples/mnist/cluster.conf
deleted file mode 100644
index ff25b8c..0000000
--- a/examples/mnist/cluster.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-nworker_groups: 1
-nserver_groups: 1
-nservers_per_group: 1
-nworkers_per_group: 1
-nservers_per_procs: 1
-nworkers_per_procs: 1
-workspace: "examples/mnist/"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/mnist/conv.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/conv.conf b/examples/mnist/conv.conf
index 5f3bf58..0bd014d 100644
--- a/examples/mnist/conv.conf
+++ b/examples/mnist/conv.conf
@@ -1,3 +1,13 @@
+cluster {
+  nworker_groups: 1
+  nserver_groups: 1
+  nservers_per_group: 1
+  nworkers_per_group: 1
+  nservers_per_procs: 1
+  nworkers_per_procs: 1
+  workspace: "examples/mnist/"
+}
+model {
 name: "mnist-conv"
 train_steps: 10000
 test_steps:100
@@ -173,3 +183,4 @@ layer{
   srclayers:"label"
 }
 }
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/mnist/job.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/job.conf b/examples/mnist/job.conf
new file mode 100644
index 0000000..ca54f92
--- /dev/null
+++ b/examples/mnist/job.conf
@@ -0,0 +1,230 @@
+cluster {
+  nworker_groups: 1
+  nserver_groups: 1
+}
+model {
+  name: "deep-big-simple-mlp"
+  train_steps: 1000
+  test_steps:10
+  test_frequency:60
+  display_frequency:30
+  alg: kBackPropagation
+  updater{
+    base_lr: 0.001
+    lr_change: kStep
+    type: kSGD
+    step_conf{
+      change_freq: 60
+      gamma: 0.997
+    }
+  }
+
+  neuralnet {
+  layer {
+    name: "data"
+    type: kShardData
+    sharddata_conf {
+      path: "examples/mnist/mnist_train_shard"
+      batchsize: 1000
+    }
+    exclude: kTest
+  }
+
+  layer {
+    name: "data"
+    type: kShardData
+    sharddata_conf {
+      path: "examples/mnist/mnist_test_shard"
+      batchsize: 1000
+    }
+    exclude: kTrain
+  }
+
+  layer{
+    name:"mnist"
+    type: kMnist
+    srclayers: "data"
+    mnist_conf {
+#    sigma: 6
+#    alpha: 38
+#    gamma: 15
+#    kernel: 21
+#    elastic_freq:100
+#    beta:15
+#    resize: 29
+      norm_a: 127.5
+      norm_b: 1
+    }
+  }
+
+
+  layer{
+    name: "label"
+    type: kLabel
+    srclayers: "data"
+  }
+
+  layer{
+    name: "fc1"
+    type: kInnerProduct
+    srclayers:"mnist"
+    innerproduct_conf{
+      num_output: 2500
+    }
+    param{
+      name: "w1"
+      init_method: kUniform
+      low:-0.05
+      high:0.05
+    }
+    param{
+      name: "b1"
+      init_method: kUniform
+      low: -0.05
+      high:0.05
+    }
+  }
+
+  layer{
+    name: "tanh1"
+    type: kTanh
+    srclayers:"fc1"
+  }
+  layer{
+    name: "fc2"
+    type: kInnerProduct
+    srclayers:"tanh1"
+    innerproduct_conf{
+      num_output: 2000
+    }
+    param{
+      name: "w2"
+      init_method: kUniform
+      low:-0.05
+      high:0.05
+    }
+    param{
+      name: "b2"
+      init_method: kUniform
+      low: -0.05
+      high:0.05
+    }
+  }
+
+  layer{
+    name: "tanh2"
+    type: kTanh
+    srclayers:"fc2"
+  }
+  layer{
+    name: "fc3"
+    type:  kInnerProduct
+    srclayers:"tanh2"
+    innerproduct_conf{
+      num_output: 1500
+    }
+    param{
+      name: "w3"
+      init_method: kUniform
+      low:-0.05
+      high:0.05
+    }
+    param{
+      name: "b3"
+      init_method: kUniform
+      low: -0.05
+      high:0.05
+    }
+
+  }
+
+  layer{
+    name: "tanh3"
+    type: kTanh
+    srclayers:"fc3"
+  }
+  layer{
+    name: "fc4"
+    type: kInnerProduct
+    srclayers:"tanh3"
+    innerproduct_conf{
+      num_output: 1000
+    }
+    param{
+      name: "w4"
+      init_method: kUniform
+      low:-0.05
+      high:0.05
+    }
+    param{
+      name: "b4"
+      init_method: kUniform
+      low: -0.05
+      high:0.05
+    }
+
+  }
+
+  layer{
+    name: "tanh4"
+    type: kTanh
+    srclayers:"fc4"
+  }
+  layer{
+    name: "fc5"
+    type: kInnerProduct
+    srclayers:"tanh4"
+    innerproduct_conf{
+      num_output: 500
+    }
+    param{
+      name: "w5"
+      init_method: kUniform
+      low:-0.05
+      high:0.05
+    }
+    param{
+      name: "b5"
+      init_method: kUniform
+      low: -0.05
+      high:0.05
+    }
+
+  }
+
+  layer{
+    name: "tanh5"
+    type: kTanh
+    srclayers:"fc5"
+  }
+  layer{
+    name: "fc6"
+    type: kInnerProduct
+    srclayers:"tanh5"
+    innerproduct_conf{
+      num_output: 10
+    }
+    param{
+      name: "w6"
+      init_method: kUniform
+      low:-0.05
+      high:0.05
+    }
+    param{
+      name: "b6"
+      init_method: kUniform
+      low: -0.05
+      high:0.05
+    }
+  }
+  layer{
+    name: "loss"
+    type:kSoftmaxLoss
+    softmaxloss_conf{
+      topk:1
+    }
+    srclayers:"fc6"
+    srclayers:"label"
+  }
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/mnist/model.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/model.conf b/examples/mnist/model.conf
deleted file mode 100644
index 4b704bc..0000000
--- a/examples/mnist/model.conf
+++ /dev/null
@@ -1,224 +0,0 @@
-name: "deep-big-simple-mlp"
-train_steps: 1000
-test_steps:10
-test_frequency:60
-display_frequency:30
-alg: kBackPropagation
-updater{
-  base_lr: 0.001
-  lr_change: kStep
-  type: kSGD
-  step_conf{
-    change_freq: 60
-    gamma: 0.997
-  }
-}
-
-neuralnet {
-layer {
-  name: "data"
-  type: kShardData
-  sharddata_conf {
-    path: "examples/mnist/mnist_train_shard"
-    batchsize: 1000
-  }
-  exclude: kTest
-}
-
-layer {
-  name: "data"
-  type: kShardData
-  sharddata_conf {
-    path: "examples/mnist/mnist_test_shard"
-    batchsize: 1000
-  }
-  exclude: kTrain
-}
-
-layer{
-  name:"mnist"
-  type: kMnist
-  srclayers: "data"
-  mnist_conf {
-#    sigma: 6
-#    alpha: 38
-#    gamma: 15
-#    kernel: 21
-#    elastic_freq:100
-#    beta:15
-#    resize: 29
-    norm_a: 127.5
-    norm_b: 1
-  }
-}
-
-
-layer{
-  name: "label"
-  type: kLabel
-  srclayers: "data"
-}
-
-layer{
-  name: "fc1"
-  type: kInnerProduct
-  srclayers:"mnist"
-  innerproduct_conf{
-    num_output: 2500
-  }
-  param{
-    name: "w1"
-    init_method: kUniform
-    low:-0.05
-    high:0.05
-  }
-  param{
-    name: "b1"
-    init_method: kUniform
-    low: -0.05
-    high:0.05
-  }
-}
-
-layer{
-  name: "tanh1"
-  type: kTanh
-  srclayers:"fc1"
-}
-layer{
-  name: "fc2"
-  type: kInnerProduct
-  srclayers:"tanh1"
-  innerproduct_conf{
-    num_output: 2000
-  }
-  param{
-    name: "w2"
-    init_method: kUniform
-    low:-0.05
-    high:0.05
-  }
-  param{
-    name: "b2"
-    init_method: kUniform
-    low: -0.05
-    high:0.05
-  }
-}
-
-layer{
-  name: "tanh2"
-  type: kTanh
-  srclayers:"fc2"
-}
-layer{
-  name: "fc3"
-  type:  kInnerProduct
-  srclayers:"tanh2"
-  innerproduct_conf{
-    num_output: 1500
-  }
-  param{
-    name: "w3"
-    init_method: kUniform
-    low:-0.05
-    high:0.05
-  }
-  param{
-    name: "b3"
-    init_method: kUniform
-    low: -0.05
-    high:0.05
-  }
-
-}
-
-layer{
-  name: "tanh3"
-  type: kTanh
-  srclayers:"fc3"
-}
-layer{
-  name: "fc4"
-  type: kInnerProduct
-  srclayers:"tanh3"
-  innerproduct_conf{
-    num_output: 1000
-  }
-  param{
-    name: "w4"
-    init_method: kUniform
-    low:-0.05
-    high:0.05
-  }
-  param{
-    name: "b4"
-    init_method: kUniform
-    low: -0.05
-    high:0.05
-  }
-
-}
-
-layer{
-  name: "tanh4"
-  type: kTanh
-  srclayers:"fc4"
-}
-layer{
-  name: "fc5"
-  type: kInnerProduct
-  srclayers:"tanh4"
-  innerproduct_conf{
-    num_output: 500
-  }
-  param{
-    name: "w5"
-    init_method: kUniform
-    low:-0.05
-    high:0.05
-  }
-  param{
-    name: "b5"
-    init_method: kUniform
-    low: -0.05
-    high:0.05
-  }
-
-}
-
-layer{
-  name: "tanh5"
-  type: kTanh
-  srclayers:"fc5"
-}
-layer{
-  name: "fc6"
-  type: kInnerProduct
-  srclayers:"tanh5"
-  innerproduct_conf{
-    num_output: 10
-  }
-  param{
-    name: "w6"
-    init_method: kUniform
-    low:-0.05
-    high:0.05
-  }
-  param{
-    name: "b6"
-    init_method: kUniform
-    low: -0.05
-    high:0.05
-  }
-}
-layer{
-  name: "loss"
-  type:kSoftmaxLoss
-  softmaxloss_conf{
-    topk:1
-  }
-  srclayers:"fc6"
-  srclayers:"label"
-}
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/examples/mnist/run.sh
----------------------------------------------------------------------
diff --git a/examples/mnist/run.sh b/examples/mnist/run.sh
deleted file mode 100755
index 1b16ca9..0000000
--- a/examples/mnist/run.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-
-if [ $# -ne 2 ];then
-  echo "Usage: run.sh [start|stop] num_procs"
-  exit
-fi
-
-script_path=`readlink -f $0`
-script_dir=`dirname $script_path`
-example_dir=`dirname $script_dir`
-singa_dir=`dirname $example_dir`
-exec_path=${singa_dir}/build/pm
-host_path=$script_dir/hostfile
-model_path=$script_dir/mlp.conf
-cluster_path=$script_dir/cluster.conf
-ssh_options="-oStrictHostKeyChecking=no \
--oUserKnownHostsFile=/dev/null"
-
-hosts=(`cat $host_path |cut -d ' ' -f 1`)
-params=(`cat $host_path | cut -d ' ' -f 2`)
-if [ $1 == "start" ]
-then
-  rm -rf $singa_dir/log*
-  for (( i=0; i<$2; i++ ))
-  do
-   	cmd="source ~/.bash_profile; touch $singa_dir/$i.lock;\
-      $exec_path  --hostfile=$script_dir/hostfile --procs_id=$i\
-      --model=${modelfile} --cluster=${clusterfile}"
-    echo ${hosts[$i]} $ssh_options  $cmd
-    ssh $ssh_options ${hosts[$i]} $cmd &
-  done
-elif [ $1 == "stop" ]
-then
-  for (( idx=0 ; idx<$2 ; idx++ ))
-  do
-    echo "ssh ${hosts[$idx]} \"kill pm\""
-    ssh $ssh_options ${hosts[$idx]} "killall -q pm"
-    sleep 1
-  done
-fi

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/neuralnet/base_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/base_layer.h b/include/neuralnet/base_layer.h
index 6ae7c50..c00a1c9 100644
--- a/include/neuralnet/base_layer.h
+++ b/include/neuralnet/base_layer.h
@@ -8,7 +8,7 @@
 #include <memory>
 #include <thread>
 
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 #include "proto/common.pb.h"
 #include "utils/param.h"
 #include "utils/common.h"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index 9a79e3f..7f3b256 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -12,7 +12,7 @@
 #include <chrono>
 #include <random>
 
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 #include "utils/data_shard.h"
 #include "neuralnet/base_layer.h"
 
@@ -237,22 +237,6 @@ class ShardDataLayer: public DataLayer{
  private:
   shared_ptr<DataShard> shard_;
 };
-class LMDBDataLayer: public DataLayer{
- public:
-  using Layer::ComputeFeature;
-
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric *perf) override;
-  void ConvertCaffeDatumToRecord(const CaffeDatum& datum,
-      SingleLabelImageRecord* record);
-
- private:
-  MDB_env* mdb_env_;
-  MDB_dbi mdb_dbi_;
-  MDB_txn* mdb_txn_;
-  MDB_cursor* mdb_cursor_;
-  MDB_val mdb_key_, mdb_value_;
-};
 
 /**
  * This layer apply Tan function to neuron activations.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuralnet.h b/include/neuralnet/neuralnet.h
index 6aec88e..1dbf44a 100644
--- a/include/neuralnet/neuralnet.h
+++ b/include/neuralnet/neuralnet.h
@@ -6,7 +6,7 @@
 #include <memory>
 #include <string>
 
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 #include "neuralnet/layer.h"
 #include "utils/factory.h"
 #include "utils/graph.h"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/neuralnet/optional_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/optional_layer.h b/include/neuralnet/optional_layer.h
new file mode 100644
index 0000000..2cbcdb8
--- /dev/null
+++ b/include/neuralnet/optional_layer.h
@@ -0,0 +1,28 @@
+#ifdef USE_OPTIONAL_LAYER
+#ifndef SINGA_NEURALNET_OPTIONAL_LAYER_
+#define SINGA_NEURALNET_OPTIONAL_LAYER_
+#include "neuralnet/layer.h"
+
+namespace singa {
+
+class LMDBDataLayer: public DataLayer{
+ public:
+  using Layer::ComputeFeature;
+
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ConvertCaffeDatumToRecord(const CaffeDatum& datum,
+      SingleLabelImageRecord* record);
+
+ private:
+  MDB_env* mdb_env_;
+  MDB_dbi mdb_dbi_;
+  MDB_txn* mdb_txn_;
+  MDB_cursor* mdb_cursor_;
+  MDB_val mdb_key_, mdb_value_;
+};
+} /* singa */
+
+#endif  // SINGA_NEURALNET_OPTIONAL_LAYER_
+#endif
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/singa.h
----------------------------------------------------------------------
diff --git a/include/singa.h b/include/singa.h
new file mode 100644
index 0000000..6eca6c4
--- /dev/null
+++ b/include/singa.h
@@ -0,0 +1,32 @@
+#ifndef SINGA_SINGA_H_
+#define SINGA_SINGA_H_
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include "utils/common.h"
+#include "proto/job.pb.h"
+#include "proto/singa.pb.h"
+
+#include "utils/param.h"
+#include "utils/singleton.h"
+#include "utils/factory.h"
+
+#include "neuralnet/neuralnet.h"
+#include "trainer/trainer.h"
+#include "communication/socket.h"
+
+DEFINE_string(singa_conf, "conf/singa.conf", "Global config file");
+
+namespace singa {
+void SubmitJob(int job, bool resume, const JobProto& jobConf) {
+  SingaProto singaConf;
+  ReadProtoFromTextFile(FLAGS_singa_conf.c_str(), &singaConf);
+  if (singaConf.has_log_dir())
+    SetupLog(singaConf.log_dir(),
+        std::to_string(job) + "-" + jobConf.model().name());
+  Trainer trainer;
+  trainer.Start(job, resume, jobConf, singaConf);
+}
+} /* singa */
+#endif  //  SINGA_SINGA_H_
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/trainer/server.h
----------------------------------------------------------------------
diff --git a/include/trainer/server.h b/include/trainer/server.h
index 7fb60c4..ef6e599 100644
--- a/include/trainer/server.h
+++ b/include/trainer/server.h
@@ -4,7 +4,7 @@
 #include <unordered_map>
 #include <utils/param.h>
 #include <utils/updater.h>
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 #include "communication/socket.h"
 
 namespace singa {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/trainer/trainer.h
----------------------------------------------------------------------
diff --git a/include/trainer/trainer.h b/include/trainer/trainer.h
index 9f47ccd..911a4c4 100644
--- a/include/trainer/trainer.h
+++ b/include/trainer/trainer.h
@@ -2,8 +2,8 @@
 #define INCLUDE_TRAINER_TRAINER_H_
 #include <unordered_map>
 #include <queue>
-#include "proto/cluster.pb.h"
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
+#include "proto/singa.pb.h"
 #include "utils/param.h"
 #include "utils/singleton.h"
 #include "utils/factory.h"
@@ -25,17 +25,16 @@ class Trainer{
   ~Trainer();
   /**
    * Entrance function which construct the workers and servers, and luanch
-   * one thread per worker/server. TODO rename variables about cluster config,
-   * job config, etc.
+   * one thread per worker/server.
    *
-   * @param resume if true resume the training from the latest checkpoint files
    * @param job job ID
-   * @param mconf model configuration
-   * @param globalconf global singa configuration
-   * @param cconf cluster configuration
+   * @param resume if true resume the training from the latest checkpoint files
+   * @param jobConf job configuration, including cluster and model configuration
+   * @param singaConf global singa configuration including zookeeper and
+   * log dir setting.
    */
-  void Start(ModelProto& mconf, const GlobalProto& gconf,
-             const ClusterProto& cconf, int job, bool resume);
+  void Start(int job, bool resume,
+      const JobProto& jobConf, const SingaProto& singaConf);
 
  protected:
   /**
@@ -45,27 +44,27 @@ class Trainer{
    * checkpoint, which will be added into the checkpoint field. The workers
    * would then load the values of params from the checkpoint files.
    *
-   * @param model_conf model configuration
+   * @param modelConf model configuration
    */
-  void Resume(ModelProto& model_conf);
+  void Resume(ModelProto* modelConf);
   /**
    * Create server instances.
    * @param nthread total num of threads in current procs which is used to
    * assign each thread a local thread ID. The number of workers is extracted
    * from Cluster
-   * @param model_conf
+   * @param modelConf
    * @return server instances
    */
-  vector<Server*> CreateServers(int nthread, const ModelProto& mproto);
+  vector<Server*> CreateServers(int nthread, const ModelProto& modelConf);
   /**
    * Create workers instances.
    * @param nthread total num of threads in current procs which is used to
    * assign each thread a local thread ID. The number of workers is extracted
    * from Cluster
-   * @param model_conf
+   * @param modelConf
    * @return worker instances
    */
-  vector<Worker*> CreateWorkers(int nthread, const ModelProto& mproto);
+  vector<Worker*> CreateWorkers(int nthread, const ModelProto& modelConf);
 
   /**
    * Setup workers and servers.
@@ -73,12 +72,12 @@ class Trainer{
    * For each worker, create and assign a neuralnet to it.
    * For each server, create and assign the param shard to it.
    * Create the partition map from slice ID to server
-   * @param model_conf
+   * @param modelConf
    * @param workers
    * @param servers
    */
   void SetupWorkerServer(
-    const ModelProto& model_conf,
+    const ModelProto& modelConf,
     const vector<Worker*>& workers,
     const vector<Server*>& servers);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/trainer/worker.h
----------------------------------------------------------------------
diff --git a/include/trainer/worker.h b/include/trainer/worker.h
index 9bda99c..db8cac3 100644
--- a/include/trainer/worker.h
+++ b/include/trainer/worker.h
@@ -1,7 +1,7 @@
 #ifndef SINGA_TRAINER_WORKER_H_
 #define SINGA_TRAINER_WORKER_H_
 #include "neuralnet/neuralnet.h"
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 #include "utils/updater.h"
 #include "communication/socket.h"
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/utils/cluster.h
----------------------------------------------------------------------
diff --git a/include/utils/cluster.h b/include/utils/cluster.h
index b48c1c5..e2c979d 100644
--- a/include/utils/cluster.h
+++ b/include/utils/cluster.h
@@ -7,7 +7,8 @@
 #include <vector>
 #include <unordered_map>
 #include "utils/common.h"
-#include "proto/cluster.pb.h"
+#include "proto/job.pb.h"
+#include "proto/singa.pb.h"
 #include "utils/cluster_rt.h"
 
 using std::shared_ptr;
@@ -24,8 +25,8 @@ namespace singa {
 class Cluster {
  public:
   static shared_ptr<Cluster> Get();
-  static shared_ptr<Cluster> Get(const GlobalProto& global, 
-                                 const ClusterProto& cluster, int job_id);
+  static shared_ptr<Cluster> Get(int job_id,
+      const SingaProto& singaConf, const ClusterProto& clusterConf);
 
   const int nserver_groups()const{ return cluster_.nserver_groups(); }
   const int nworker_groups()const { return cluster_.nworker_groups(); }
@@ -59,7 +60,7 @@ class Cluster {
   /**
    * @return global procs id, which starts from 0.
    */
-  int procs_id()const {return procs_id_;}
+  int procs_id() const {return procs_id_;}
   void set_procs_id(int procs_id) {procs_id_ = procs_id;}
   bool server_worker_separate() const {
     return cluster_.server_worker_separate();
@@ -88,6 +89,7 @@ class Cluster {
   const string checkpoint_folder() const {
     return cluster_.workspace()+"/checkpoint";
   }
+  /*
   const int stub_timeout() const {
     return cluster_.stub_timeout();
   }
@@ -97,6 +99,7 @@ class Cluster {
   const int server_timeout() const {
     return cluster_.server_timeout();
   }
+  */
 
   const bool server_update() const {
     return cluster_.server_update();
@@ -121,14 +124,24 @@ class Cluster {
     return cluster_rt_;
   }
 
+  /**
+   * @return logical procs ID
+   */
   int ProcsIDOf(int group_id, int id, int flag);
   const string hostip() const {
     return hostip_;
   }
-  void Register(const string& endpoint, int pid);
+  /**
+   * Register this process.
+   *
+   * @param pid physical process id get from OS, all other procs ID refers to
+   * logical process ID.
+   * @param endpoint unique string for other procs to connect
+   */
+  void Register(int pid, const string& endpoint);
 
  private:
-  Cluster(const GlobalProto& global, const ClusterProto &cluster, int job_id);
+  Cluster(int job, const SingaProto& singaConf, const ClusterProto& clusterConf);
   void SetupFolders(const ClusterProto &cluster);
   int Hash(int gid, int id, int flag);
 
@@ -139,7 +152,7 @@ class Cluster {
   std::vector<std::string> endpoints_;
   // cluster config proto
   ClusterProto cluster_;
-  GlobalProto global_;
+  SingaProto singa_;
   shared_ptr<ClusterRuntime> cluster_rt_;
   // make this class a singlton
   static shared_ptr<Cluster> instance_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/utils/common.h
----------------------------------------------------------------------
diff --git a/include/utils/common.h b/include/utils/common.h
index ef83031..c4ff4eb 100644
--- a/include/utils/common.h
+++ b/include/utils/common.h
@@ -2,6 +2,7 @@
 #define SINGA_UTILS_COMMON_H_
 
 #include <google/protobuf/message.h>
+#include <gflags/gflags.h>
 #include <stdlib.h>
 #include <unordered_map>
 #include <sstream>
@@ -9,6 +10,11 @@
 #include <vector>
 #include "proto/common.pb.h"
 
+#ifndef GFLAGS_GFLAGS_H_
+  namespace gflags = google;
+#endif  // GFLAGS_GFLAGS_H_
+
+
 namespace singa {
 using std::vector;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/utils/param.h
----------------------------------------------------------------------
diff --git a/include/utils/param.h b/include/utils/param.h
index 0273519..18293b6 100644
--- a/include/utils/param.h
+++ b/include/utils/param.h
@@ -2,7 +2,7 @@
 #define INCLUDE_UTILS_PARAM_H_
 #include <vector>
 #include <string>
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 #include "utils/blob.h"
 #include "communication/msg.h"
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/include/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/utils/updater.h b/include/utils/updater.h
index ea6d74a..d2f4dc1 100644
--- a/include/utils/updater.h
+++ b/include/utils/updater.h
@@ -1,6 +1,6 @@
 #ifndef INCLUDE_UTILS_UPDATER_H_
 #define INCLUDE_UTILS_UPDATER_H_
-#include "proto/model.pb.h"
+#include "proto/job.pb.h"
 #include "utils/param.h"
 
 namespace singa{

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/main.cc
----------------------------------------------------------------------
diff --git a/src/main.cc b/src/main.cc
index cda1294..87ab384 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -1,29 +1,19 @@
-#include <gflags/gflags.h>
-#include <glog/logging.h>
-#include "trainer/trainer.h"
-#include "utils/common.h"
-#ifndef GFLAGS_GFLAGS_H_
-  namespace gflags = google;
-#endif  // GFLAGS_GFLAGS_H_
+#include "singa.h"
 /**
  * \file main.cc is the main entry of SINGA, like the driver program for Hadoop.
  *
  * 1. Users register their own implemented classes, e.g., layer, updater, etc.
- * 2. Users prepare the google protobuf object for the model configuration and
- * the cluster configuration.
+ * 2. Users prepare the google protobuf object for the job configuration.
  * 3. Users call trainer to start the training.
  *
  * TODO
- * 1. Add helper functions for users to configure their model and cluster
- * easily, e.g., AddLayer(layer_type, source_layers, meta_data).
+ * 1. Add helper functions for users to configure their model easily,
+ * e.g., AddLayer(layer_type, source_layers, meta_data).
  */
 
-// TODO: re-organize ClusterProto, GlobalProto and ModelProto.
-DEFINE_int32(job, -1, "Unique job ID");
-DEFINE_bool(resume, false, "Resume from checkpoint");
-DEFINE_string(cluster, "examples/mnist/cluster.conf", "Cluster config file");
-DEFINE_string(model, "examples/mnist/conv.conf", "Model config file");
-DEFINE_string(global, "conf/singa.conf", "Global config file");
+DEFINE_int32(job, -1, "Unique job ID generated from singa-run.sh");
+DEFINE_bool(resume, false, "Resume from checkpoint passed at cmd line");
+DEFINE_string(workspace, "./workspace", "workspace passed at cmd line");
 
 /**
  * Register layers, and other customizable classes.
@@ -31,7 +21,8 @@ DEFINE_string(global, "conf/singa.conf", "Global config file");
  * If users want to use their own implemented classes, they should register
  * them here. Refer to the Worker::RegisterDefaultClasses()
  */
-void RegisterClasses(const singa::ModelProto& proto) {
+void RegisterClasses() {
+
 }
 
 
@@ -39,20 +30,15 @@ int main(int argc, char **argv) {
   google::InitGoogleLogging(argv[0]);
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
-  singa::ClusterProto cluster;
-  singa::ReadProtoFromTextFile(FLAGS_cluster.c_str(), &cluster);
-  singa::ModelProto model;
-  singa::ReadProtoFromTextFile(FLAGS_model.c_str(), &model);
-  singa::GlobalProto global;
-  singa::ReadProtoFromTextFile(FLAGS_global.c_str(), &global);
-  singa::SetupLog(global.log_dir(), model.name());
-
-  LOG(INFO) << "The cluster config is\n" << cluster.DebugString();
-  LOG(INFO) << "The model config is\n" << model.DebugString();
-  LOG(INFO) << "The global config is\n" << global.DebugString();
+  singa::JobProto jobConf;
+  std::string job_file = FLAGS_workspace + "/job.conf";
+  singa::ReadProtoFromTextFile(job_file.c_str(), &jobConf);
+  CHECK(jobConf.has_cluster());
+  CHECK(jobConf.has_model());
+  if (!jobConf.cluster().has_workspace())
+    jobConf.mutable_cluster()->set_workspace(FLAGS_workspace);
 
-  RegisterClasses(model);
-  singa::Trainer trainer;
-  trainer.Start(model, global, cluster, FLAGS_job, FLAGS_resume);
+  RegisterClasses();
+  singa::SubmitJob(FLAGS_job, FLAGS_resume, jobConf);
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index e965888..3d9def4 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -1,8 +1,6 @@
 #include <glog/logging.h>
 #include <memory>
 #include <algorithm>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/imgproc/imgproc.hpp>
 #include "mshadow/tensor.h"
 #include "mshadow/cxxnet_op.h"
 #include "neuralnet/layer.h"
@@ -231,103 +229,6 @@ void LabelLayer::ParseRecords(Phase phase, const vector<Record>& records,
   CHECK_EQ(rid, blob->shape()[0]);
 }
 
-
-/*********************LMDBDataLayer**********************************/
-void LMDBDataLayer::ComputeFeature(Phase phase, Metric* perf){
-  if(random_skip_){
-    int nskip = rand() % random_skip_;
-    int n=0;
-    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
-          &mdb_value_, MDB_FIRST), MDB_SUCCESS);
-    while (mdb_cursor_get(mdb_cursor_, &mdb_key_,
-          &mdb_value_, MDB_NEXT) == MDB_SUCCESS)
-      n++;
-    LOG(INFO)<<"Random Skip "<<nskip<<" records of total "<<n<<"records";
-    // We have reached the end. Restart from the first.
-    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
-          &mdb_value_, MDB_FIRST), MDB_SUCCESS);
-    for(int i=0;i<nskip;i++){
-      if (mdb_cursor_get(mdb_cursor_, &mdb_key_,
-            &mdb_value_, MDB_NEXT) != MDB_SUCCESS) {
-        // We have reached the end. Restart from the first.
-        DLOG(INFO) << "Restarting data prefetching from start.";
-        CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
-              &mdb_value_, MDB_FIRST), MDB_SUCCESS);
-      }
-    }
-    random_skip_=0;
-  }
-  CaffeDatum datum;
-  for(auto& record: records_){
-    SingleLabelImageRecord* image=record.mutable_image();
-    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
-          &mdb_value_, MDB_GET_CURRENT), MDB_SUCCESS);
-    datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size);
-    ConvertCaffeDatumToRecord(datum, image);
-    if (mdb_cursor_get(mdb_cursor_, &mdb_key_,
-          &mdb_value_, MDB_NEXT) != MDB_SUCCESS) {
-      // We have reached the end. Restart from the first.
-      DLOG(INFO) << "Restarting data prefetching from start.";
-      CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
-            &mdb_value_, MDB_FIRST), MDB_SUCCESS);
-    }
-  }
-}
-
-void LMDBDataLayer::ConvertCaffeDatumToRecord(const CaffeDatum& datum,
-    SingleLabelImageRecord* record){
-  record->set_label(datum.label());
-  record->clear_shape();
-  if(datum.has_channels())
-    record->add_shape(datum.channels());
-  if(datum.has_height())
-    record->add_shape(datum.height());
-  if(datum.has_width())
-    record->add_shape(datum.width());
-  if(datum.has_data())
-    record->set_pixel(datum.data());
-  if(datum.float_data_size()){
-    record->clear_data();
-    for(float x: datum.float_data())
-      record->add_data(x);
-  }
-}
-
-void LMDBDataLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
-  CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed";
-  CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS); // 1TB
-  CHECK_EQ(mdb_env_open(mdb_env_,
-        proto.lmdbdata_conf().path().c_str(),
-        MDB_RDONLY, 0664), MDB_SUCCESS) << "cannot open lmdb "
-    << proto.lmdbdata_conf().path();
-  CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS)
-    << "mdb_txn_begin failed";
-  CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS)
-    << "mdb_open failed";
-  CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS)
-    << "mdb_cursor_open failed";
-  LOG(INFO) << "Opening lmdb " << proto.lmdbdata_conf().path();
-  CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST),
-      MDB_SUCCESS) << "mdb_cursor_get failed";
-
-  if (mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT)
-      != MDB_SUCCESS) {
-    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_,
-          MDB_FIRST), MDB_SUCCESS);
-  }
-  CaffeDatum datum;
-  datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size);
-  SingleLabelImageRecord* record=sample_.mutable_image();
-  ConvertCaffeDatumToRecord(datum, record);
-
-  batchsize_=batchsize();
-  if(partition_dim() == 0)
-    batchsize_ /= npartitions;
-  records_.resize(batchsize_);
-  random_skip_=proto.lmdbdata_conf().random_skip();
-}
-
 /***************** Implementation for LRNLayer *************************/
 void LRNLayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);
@@ -377,63 +278,29 @@ void MnistLayer::ParseRecords(Phase phase,
   LOG_IF(ERROR, records.size()==0)<<"Empty records to parse";
   int ndim=records.at(0).image().shape_size();
   int inputsize =records.at(0).image().shape(ndim-1);
+  CHECK_EQ(inputsize, blob->shape()[1]);
 
   float* dptr=blob->mutable_cpu_data();
   for(const Record& record: records){
-    // copy from record to cv::Mat
-    cv::Mat input(inputsize, inputsize, CV_32FC1);
     const SingleLabelImageRecord& imagerecord=record.image();
-    if(imagerecord.pixel().size()){
+    if(imagerecord.pixel().size()) {
       string pixel=imagerecord.pixel();
-      for(int i=0,k=0;i<inputsize;i++)
-        for(int j=0;j<inputsize;j++)
+      for(int i = 0, k = 0; i < inputsize; i++) {
+        for(int j = 0; j < inputsize; j++) {
           // NOTE!!! must cast pixel to uint8_t then to float!!! waste a lot of
           // time to debug this
-          input.at<float>(i,j)=static_cast<float>(static_cast<uint8_t>(pixel[k++]));
-    }else{
-      for(int i=0,k=0;i<inputsize;i++)
-        for(int j=0;j<inputsize;j++)
-          input.at<float>(i,j)=imagerecord.data(k++);
-    }
-    int size=blob->shape()[1];
-    /*
-    cv::Mat resizeMat=input;
-    // affine transform, scaling, rotation and shearing
-    if(gamma_){
-      float r1=rand_real()*2-1;
-      float r2=rand_real()*2-1;
-      int h=static_cast<int>(inputsize*(1.+r1*gamma_/100.0));
-      int w=static_cast<int>(inputsize*(1.+r2*gamma_/100.0));
-      cv::resize(input, resizeMat, cv::Size(h,w));
-    }
-    cv::Mat betaMat=resizeMat;
-    cv::Mat warpmat(2,3, CV_32FC1);
-    warpmat.at<float>(0,0)=1.0;
-    warpmat.at<float>(0,1)=0.0;
-    warpmat.at<float>(0,2)=0.0;
-    warpmat.at<float>(1,0)=0.0;
-    warpmat.at<float>(1,1)=1.0;
-    warpmat.at<float>(1,2)=0.0;
-
-    if(beta_){
-      float r=rand_real()*2-1;
-      if(rand() % 2){ // rotation
-        cv::Point center(resizeMat.rows/2, resizeMat.cols/2);
-        warpmat=cv::getRotationMatrix2D(center, r*beta_, 1.0);
-      }else{
-        //shearing
-        warpmat.at<float>(0,1)=r*beta_/90;
-        if(imagerecord.label()==1 ||imagerecord.label()==7)
-          warpmat.at<float>(0,1)/=2.0;
+          float x =  static_cast<float>(static_cast<uint8_t>(pixel[k++]));
+          x = x / norm_a_-norm_b_;
+          *dptr = x;
+          dptr++;
+        }
       }
-    }
-    cv::warpAffine(resizeMat, betaMat, warpmat, cv::Size(size, size));
-    */
-
-    for(int i=0;i<size;i++){
-      for(int j=0;j<size;j++){
-        *dptr=input.at<float>(i,j)/norm_a_-norm_b_;
-        dptr++;
+    } else {
+      for(int i = 0, k = 0; i < inputsize; i++) {
+        for(int j = 0; j < inputsize; j++) {
+          *dptr = imagerecord.data(k++) / norm_a_ - norm_b_;
+          dptr++;
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 173f08f..7769e45 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -22,7 +22,6 @@ void NeuralNet::RegisterLayers() {
   RegisterLayer(factory, Dropout);
   RegisterLayer(factory, InnerProduct);
   RegisterLayer(factory, Label);
-  RegisterLayer(factory, LMDBData);
   RegisterLayer(factory, LRN);
   RegisterLayer(factory, Mnist);
   RegisterLayer(factory, Prefetch);
@@ -34,6 +33,10 @@ void NeuralNet::RegisterLayers() {
   RegisterLayer(factory, SoftmaxLoss);
   RegisterLayer(factory, Split);
   RegisterLayer(factory, Tanh);
+
+#ifdef USE_OPTIONAL_LAYER
+  RegisterLayer(factory, LMDBData);
+#endif
 }
 
 shared_ptr<NeuralNet> NeuralNet::Create(

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/neuralnet/optional_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/optional_layer.cc b/src/neuralnet/optional_layer.cc
new file mode 100644
index 0000000..ba85807
--- /dev/null
+++ b/src/neuralnet/optional_layer.cc
@@ -0,0 +1,104 @@
+#ifdef USE_OPTIONAL_LAYER
+#include "neuralnet/optional_layer.h"
+namespace singa {
+
+/*********************LMDBDataLayer**********************************/
+void LMDBDataLayer::ComputeFeature(Phase phase, Metric* perf){
+  if(random_skip_){
+    int nskip = rand() % random_skip_;
+    int n=0;
+    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+          &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+    while (mdb_cursor_get(mdb_cursor_, &mdb_key_,
+          &mdb_value_, MDB_NEXT) == MDB_SUCCESS)
+      n++;
+    LOG(INFO)<<"Random Skip "<<nskip<<" records of total "<<n<<"records";
+    // We have reached the end. Restart from the first.
+    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+          &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+    for(int i=0;i<nskip;i++){
+      if (mdb_cursor_get(mdb_cursor_, &mdb_key_,
+            &mdb_value_, MDB_NEXT) != MDB_SUCCESS) {
+        // We have reached the end. Restart from the first.
+        DLOG(INFO) << "Restarting data prefetching from start.";
+        CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+              &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+      }
+    }
+    random_skip_=0;
+  }
+  CaffeDatum datum;
+  for(auto& record: records_){
+    SingleLabelImageRecord* image=record.mutable_image();
+    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+          &mdb_value_, MDB_GET_CURRENT), MDB_SUCCESS);
+    datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size);
+    ConvertCaffeDatumToRecord(datum, image);
+    if (mdb_cursor_get(mdb_cursor_, &mdb_key_,
+          &mdb_value_, MDB_NEXT) != MDB_SUCCESS) {
+      // We have reached the end. Restart from the first.
+      DLOG(INFO) << "Restarting data prefetching from start.";
+      CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+            &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+    }
+  }
+}
+
+void LMDBDataLayer::ConvertCaffeDatumToRecord(const CaffeDatum& datum,
+    SingleLabelImageRecord* record){
+  record->set_label(datum.label());
+  record->clear_shape();
+  if(datum.has_channels())
+    record->add_shape(datum.channels());
+  if(datum.has_height())
+    record->add_shape(datum.height());
+  if(datum.has_width())
+    record->add_shape(datum.width());
+  if(datum.has_data())
+    record->set_pixel(datum.data());
+  if(datum.float_data_size()){
+    record->clear_data();
+    for(float x: datum.float_data())
+      record->add_data(x);
+  }
+}
+
+void LMDBDataLayer::Setup(const LayerProto& proto, int npartitions) {
+  Layer::Setup(proto, npartitions);
+  CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed";
+  CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS); // 1TB
+  CHECK_EQ(mdb_env_open(mdb_env_,
+        proto.lmdbdata_conf().path().c_str(),
+        MDB_RDONLY, 0664), MDB_SUCCESS) << "cannot open lmdb "
+    << proto.lmdbdata_conf().path();
+  CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS)
+    << "mdb_txn_begin failed";
+  CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS)
+    << "mdb_open failed";
+  CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS)
+    << "mdb_cursor_open failed";
+  LOG(INFO) << "Opening lmdb " << proto.lmdbdata_conf().path();
+  CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST),
+      MDB_SUCCESS) << "mdb_cursor_get failed";
+
+  if (mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT)
+      != MDB_SUCCESS) {
+    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_,
+          MDB_FIRST), MDB_SUCCESS);
+  }
+  CaffeDatum datum;
+  datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size);
+  SingleLabelImageRecord* record=sample_.mutable_image();
+  ConvertCaffeDatumToRecord(datum, record);
+
+  batchsize_=batchsize();
+  if(partition_dim() == 0)
+    batchsize_ /= npartitions;
+  records_.resize(batchsize_);
+  random_skip_=proto.lmdbdata_conf().random_skip();
+}
+
+} /* singa */
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0478e8cf/src/proto/cluster.proto
----------------------------------------------------------------------
diff --git a/src/proto/cluster.proto b/src/proto/cluster.proto
deleted file mode 100644
index 7afb866..0000000
--- a/src/proto/cluster.proto
+++ /dev/null
@@ -1,61 +0,0 @@
-package singa;
-
-message GlobalProto {
-  // ip/hostname:port[,ip/hostname:port]
-  required string zookeeper_host = 1;
-  // if not set, use the default dir of glog
-  optional string log_dir = 2 [default = "/tmp/singa-log/"];
-}
-
-message ClusterProto {
-  optional int32 nworker_groups = 1;
-  optional int32 nserver_groups = 2;
-  optional int32 nworkers_per_group = 3 [default = 1];
-  optional int32 nservers_per_group = 4 [default = 1];
-  optional int32 nworkers_per_procs = 5 [default = 1];
-  optional int32 nservers_per_procs = 6 [default = 1];
-
-  // Used in standalone mode, one ip or hostname per line
-  // For YARN or Mesos version, the processes are allocted dynamically,
-  // hence no need to specify the hosts statically
-  optional string hostfile = 10 [default=""];
-
-  // servers and workers in different processes?
-  optional bool server_worker_separate = 11 [default = false];
-
-  // port number is used by ZeroMQ
-  optional int32 start_port = 13 [default = 6723];
-  // local workspace, train/val/test shards, checkpoint files
-  required string workspace = 14;
-  // relative path to workspace. if not set, use the default dir of glog
-  //optional string log_dir = 15;
-  // ip/hostname : port [, ip/hostname : port]
-  //optional string zookeeper_host = 16 [default = "localhost:2181"];
-  // message size limit, default 1MB
-  // optional int32 largest_message = 20 [default = 1048576];
-  // optional float bandwidth = 21 [default = 100];  // MB/s
-
-	//repeated ServerTopology server_group = 20;
-
-  optional int32 stub_timeout = 30 [default = 5000];
-  optional int32 worker_timeout = 31 [default = 5000];
-  optional int32 server_timeout = 32 [default = 5000];
-
-  // conduct updates at server side; otherwise do it at worker side
-  optional bool server_update = 40 [default = true];
-  // share memory space between worker groups in one procs
-  optional bool share_memory = 41 [default = true];
-
-  // bandwidth of ethernet, Bytes per second, default is 1 Gbps
-  optional int32 bandwidth=50 [default=134217728];
-  // poll time in milliseconds
-  optional int32 poll_time=51 [default =100];
-}
-
-message ServerTopology {
-  // group id
-	required int32 id = 1;
-	optional int32 sync_interval = 2;
-  // neighbor group id
-	repeated int32 neighbor = 3;
-}


[3/3] incubator-singa git commit: SINGA-36 Clean ModelProto, ClusterProto, JobProto and driver program

Posted by wa...@apache.org.
SINGA-36 Clean ModelProto, ClusterProto, JobProto and driver program

change singatool glog output
 * all logs will be directed to stderr
 * log level is LOG_ERROR


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c3a248a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c3a248a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c3a248a4

Branch: refs/heads/master
Commit: c3a248a4b66e80e8c7c97778ea576bd09feaa412
Parents: 0478e8c
Author: wang sheng <wa...@gmail.com>
Authored: Thu Jul 23 14:45:33 2015 +0800
Committer: wang sheng <wa...@gmail.com>
Committed: Thu Jul 23 14:45:33 2015 +0800

----------------------------------------------------------------------
 src/utils/tool.cc | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c3a248a4/src/utils/tool.cc
----------------------------------------------------------------------
diff --git a/src/utils/tool.cc b/src/utils/tool.cc
index 3ffd0e8..8e35997 100644
--- a/src/utils/tool.cc
+++ b/src/utils/tool.cc
@@ -6,29 +6,30 @@
 #include "proto/singa.pb.h"
 #include "utils/common.h"
 #ifndef GFLAGS_GFLAGS_H_
-  namespace gflags = google;
+namespace gflags = google;
 #endif  // GFLAGS_GFLAGS_H_
 
 DEFINE_string(global, "conf/singa.conf", "Global config file");
 
 int main(int argc, char **argv) {
   google::InitGoogleLogging(argv[0]);
+  // set logging level to ERROR and log to STDERR
+  FLAGS_logtostderr = 1;
+  FLAGS_minloglevel = 2;
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
   singa::SingaProto global;
   singa::ReadProtoFromTextFile(FLAGS_global.c_str(), &global);
-  singa::SetupLog(global.log_dir(), "SingaTool");
-
   LOG(INFO) << "The global config is \n" << global.DebugString();
 
   singa::JobManager mng(global.zookeeper_host());
   std::string usage = "singatool usage:\n"
-      "./singatool create       :  generate a unique job id\n"
-      "./singatool list         :  list running singa jobs\n"
-      "./singatool view JOB_ID  :  view procs of a singa job\n"
-      "./singatool clean JOB_ID :  clean a job path in zookeeper\n"
-      "./singatool cleanup      :  clean all singa data in zookeeper\n"
-      "./singatool listall      :  list all singa jobs\n";
+      "# ./singatool create       :  generate a unique job id\n"
+      "# ./singatool list         :  list running singa jobs\n"
+      "# ./singatool view JOB_ID  :  view procs of a singa job\n"
+      "# ./singatool clean JOB_ID :  clean a job path in zookeeper\n"
+      "# ./singatool cleanup      :  clean all singa data in zookeeper\n"
+      "# ./singatool listall      :  list all singa jobs\n";
   if (argc <= 1) {
     LOG(ERROR) << usage;
     return 1;