You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/17 18:02:22 UTC

[01/51] [abbrv] incubator-singa git commit: SINGA-235 - Unify the engines for cudnn and singa layers

Repository: incubator-singa
Updated Branches:
  refs/heads/master 1ca8c638b -> f9c6d5c05


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index a87eb10..c8c8c05 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -22,6 +22,12 @@ from . import singa_wrap
 from .proto import model_pb2
 import tensor
 
+# engine could be 'cudnn', 'singa', which is used to create layers.
+# e.g., CudnnConvolution layer is identified by 'cudnn_convolution'
+# Convolution layer is identified by 'singa_convolution'
+# engine is case insensitive
+engine = 'cudnn'
+
 
 class Layer(object):
     """Base Python layer class.
@@ -78,12 +84,31 @@ class Layer(object):
         return tensor.from_raw_tensors(self.layer.param_values())
 
     def forward(self, flag, input):
+        '''Forward propagate through this layer.
+
+        Args:
+            flag, kTrain or kEval
+            input, an input tensor
+
+        Return:
+            a tensor for the transformed feature
+        '''
         assert self.has_setup, 'Must call setup() before forward()'
         assert isinstance(input, tensor.Tensor), 'input must be py Tensor'
         y = self.layer.Forward(flag, input.singa_tensor)
         return tensor.from_raw_tensor(y)
 
     def backward(self, flag, grad):
+        '''Backward propagate through this layer.
+
+        Args:
+            flag, for future use.
+            grad, gradient of the returned values of the forward function.
+
+        Return:
+            <dx, <dp1, dp2..>>, dx is the gradient of the input of the
+            forward function, dpi is the gradient of the i-th parameter
+        '''
         assert isinstance(grad, tensor.Tensor), 'grad must be py Tensor'
         ret = self.layer.Backward(flag, grad.singa_tensor)
         return tensor.from_raw_tensor(ret[0]), tensor.from_raw_tensors(ret[1])
@@ -104,7 +129,7 @@ class Layer(object):
 class Conv2D(Layer):
 
     def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
-                 engine='cudnn', cudnn_prefer='fatest', data_format='NCHW',
+                 cudnn_prefer='fatest', data_format='NCHW',
                  use_bias=True, W_specs=None, b_specs=None,
                  pad=None, input_sample_shape=None):
         """Construct a layer for 2D convolution.
@@ -117,8 +142,6 @@ class Conv2D(Layer):
                 'valid' -> padding is 0 for height and width
                 'same' -> padding is half of the kernel (floor),
                     the kernel must be odd number.
-            engine (string): implementation engin, could be 'cudnn'
-                (case insensitive)
             cudnn_prefer (string): the preferred algorithm for cudnn convolution
                 which could be 'fatest', 'autotune', 'limited_workspace' and
                 'no_workspace'
@@ -165,7 +188,7 @@ class Conv2D(Layer):
         self.conf.param.extend([bspecs])
         self.param_specs.append(bspecs)
 
-        _check_engine(engine, ['cudnn'])
+        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, 'Convolution')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -174,7 +197,7 @@ class Conv2D(Layer):
 class Conv1D(Conv2D):
 
     def __init__(self, name, nb_kernels, kernel=3, stride=1,
-                 border_mode='same', engine='cudnn', cudnn_prefer='fatest',
+                 border_mode='same', cudnn_prefer='fatest',
                  use_bias=True, W_specs={'init': 'Xavier'},
                  b_specs={'init': 'Constant', 'value': 0}, pad=None,
                  input_sample_shape=None):
@@ -191,7 +214,7 @@ class Conv1D(Conv2D):
         if input_sample_shape is not None:
             input_sample_shape = (1, 1, input_sample_shape[0])
         super(Conv1D, self).__init__(name, nb_kernels, (1, kernel), (0, stride),
-                                     border_mode, engine, cudnn_prefer,
+                                     border_mode, cudnn_prefer,
                                      use_bias=use_bias, pad=pad,
                                      W_specs=W_specs, b_specs=b_specs,
                                      input_sample_shape=input_sample_shape)
@@ -206,15 +229,14 @@ class Conv1D(Conv2D):
 class Pooling2D(Layer):
 
     def __init__(self, name, mode, kernel=3, stride=2, border_mode='same',
-                 pad=None, data_format='NCHW', engine='cudnn',
-                 input_sample_shape=None):
+                 pad=None, data_format='NCHW', input_sample_shape=None):
         super(Pooling2D, self).__init__(name)
         assert data_format == 'NCHW', 'Not supported data format: %s ' \
             'only "NCHW" is enabled currently' % (data_format)
         conf = self.conf.pooling_conf
         conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad)
         conf.pool = mode
-        _check_engine(engine, ['cudnn'])
+        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, 'Pooling')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -223,27 +245,25 @@ class Pooling2D(Layer):
 class MaxPooling2D(Pooling2D):
 
     def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', engine='cudnn', input_sample_shape=None):
+                 data_format='NCHW', input_sample_shape=None):
         super(MaxPooling2D, self).__init__(name, model_pb2.PoolingConf.MAX,
                                            kernel, stride, border_mode,
-                                           pad, data_format, engine,
-                                           input_sample_shape)
+                                           pad, data_format, input_sample_shape)
 
 
 class AvgPooling2D(Pooling2D):
 
     def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', engine='cudnn', input_sample_shape=None):
+                 data_format='NCHW', input_sample_shape=None):
         super(AvgPooling2D, self).__init__(name, model_pb2.PoolingConf.AVE,
                                            kernel, stride, border_mode,
-                                           pad, data_format, engine,
-                                           input_sample_shape)
+                                           pad, data_format, input_sample_shape)
 
 
 class MaxPooling1D(MaxPooling2D):
 
     def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', engine='cudnn', input_sample_shape=None):
+                 data_format='NCHW', input_sample_shape=None):
         """Max pooling for 1D feature.
 
         Args:
@@ -260,8 +280,7 @@ class MaxPooling1D(MaxPooling2D):
             input_sample_shape = None
         super(MaxPooling1D, self).__init__(name, (1, kernel), (0, stride),
                                            border_mode, pad,
-                                           data_format, engine,
-                                           input_sample_shape)
+                                           data_format, input_sample_shape)
 
     def get_output_sample_shape(self):
         shape = self.layer.GetOutputSampleShape()
@@ -271,7 +290,7 @@ class MaxPooling1D(MaxPooling2D):
 class AvgPooling1D(AvgPooling2D):
 
     def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', engine='cudnn', input_sample_shape=None):
+                 data_format='NCHW', input_sample_shape=None):
         """input_feature_length is a scalar value"""
         pad2 = None
         if pad is not None:
@@ -285,8 +304,7 @@ class AvgPooling1D(AvgPooling2D):
 
         super(AvgPooling1D, self).__init__(name, (kernel, 1), (0, stride),
                                            border_mode, pad2,
-                                           data_format, engine,
-                                           input_sample_shape)
+                                           data_format, input_sample_shape)
 
     def get_output_sample_shape(self):
         shape = self.layer.GetOutputSampleShape()
@@ -296,7 +314,7 @@ class AvgPooling1D(AvgPooling2D):
 class BatchNormalization(Layer):
     # TODO(wangwei) add mode and epsilon arguments
 
-    def __init__(self, name, momentum=0.9, engine='cudnn',
+    def __init__(self, name, momentum=0.9,
                  beta_specs=None, gamma_specs=None, input_sample_shape=None):
         """Batch-normalization.
 
@@ -337,16 +355,15 @@ class BatchNormalization(Layer):
         self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
         self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
         self.param_specs.append(_construct_param_specs_from_dict(var_specs))
-        _check_engine(engine, ['cudnn'])
+        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, 'BatchNorm')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 
 
 class LRN(Layer):
-
     def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
-                 k=1, engine='cudnn', input_sample_shape=None):
+                 k=1, input_sample_shape=None):
         """Local response normalization.
 
         Args:
@@ -364,7 +381,7 @@ class LRN(Layer):
         # TODO(wangwei) enable mode = 'within_channel'
         assert mode == 'cross_channel', 'only support mode="across_channel"'
         conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS
-        _check_engine(engine, ['cudnn'])
+        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, 'LRN')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -374,7 +391,7 @@ class Dense(Layer):
 
     def __init__(self, name, num_output, use_bias=True,
                  W_specs=None, b_specs=None,
-                 W_transpose=True, engine='cuda', input_sample_shape=None):
+                 W_transpose=True, input_sample_shape=None):
         """Apply linear/affine transformation, also called inner-product or
         fully connected layer.
 
@@ -392,7 +409,6 @@ class Dense(Layer):
                 'regularizer' for regularization, currently support 'l2'
             b_specs (dict): specs for the bias vector, same fields as W_specs.
             W_transpose (bool): if true, output=x*W.T+b;
-            engine (string): could be 'cudnn', 'cuda'
             input_sample_shape (tuple): input feature length
         """
         super(Dense, self).__init__(name)
@@ -412,22 +428,19 @@ class Dense(Layer):
         self.param_specs.append(_construct_param_specs_from_dict(W_specs))
         self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
         self.param_specs.append(_construct_param_specs_from_dict(b_specs))
-        if engine == 'cudnn':
-            engine = 'cuda'
-        _check_engine(engine, ['cuda', 'cpp'])
-        self.layer = _create_layer(engine, 'Dense')
+        # dense layer is transparent to engine.
+        self.layer = _create_layer('singa', 'Dense')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 
 
 class Dropout(Layer):
 
-    def __init__(self, name, p=0.5, engine='cuda', input_sample_shape=None):
+    def __init__(self, name, p=0.5, input_sample_shape=None):
         """Droput layer.
 
         Args:
             p (float): probability for dropping out the element, i.e., set to 0
-            engine (string): 'cudnn' for cudnn version>=5; or 'cuda'
             name (string): layer name
         """
         super(Dropout, self).__init__(name)
@@ -436,7 +449,7 @@ class Dropout(Layer):
         # 'cudnn' works for v>=5.0
         #  if engine.lower() == 'cudnn':
         #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'cuda', 'cpp'])
+        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, 'Dropout')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -444,28 +457,25 @@ class Dropout(Layer):
 
 class Activation(Layer):
 
-    def __init__(self, name, mode='relu', engine='cudnn',
-                 input_sample_shape=None):
+    def __init__(self, name, mode='relu', input_sample_shape=None):
         """Activation layers.
 
         Args:
-            engine (string): 'cudnn'
             name (string): layer name
             mode (string): 'relu', 'sigmoid', or 'tanh'
             input_sample_shape (tuple): shape of a single sample
         """
         super(Activation, self).__init__(name)
-        _check_engine(engine, ['cudnn', 'cuda', 'cpp'])
-        mode_dict = {'relu': 'RELU', 'sigmoid': 'SIGMOID', 'tanh': 'TANH'}
-        self.conf.type = mode_dict[mode.lower()]
-        self.layer = _create_layer(engine, 'Activation')
+        self.conf.type = (engine + '_' + mode).lower()
+        _check_engine(engine, ['cudnn', 'singa'])
+        self.layer = _create_layer(engine, mode)
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 
 
 class Softmax(Layer):
 
-    def __init__(self, name, axis=1, engine='cudnn', input_sample_shape=None):
+    def __init__(self, name, axis=1, input_sample_shape=None):
         """Apply softmax.
 
         Args:
@@ -476,7 +486,7 @@ class Softmax(Layer):
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
-        _check_engine(engine, ['cudnn', 'cuda', 'cpp'])
+        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, 'Softmax')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -484,7 +494,7 @@ class Softmax(Layer):
 
 class Flatten(Layer):
 
-    def __init__(self, name, axis=1, engine='cudnn', input_sample_shape=None):
+    def __init__(self, name, axis=1, input_sample_shape=None):
         """Reshape the input tensor into a matrix.
         Args:
             axis (int): reshape the input as a matrix with the dimension
@@ -494,24 +504,39 @@ class Flatten(Layer):
         super(Flatten, self).__init__(name)
         conf = self.conf.flatten_conf
         conf.axis = axis
-        _check_engine(engine, ['cudnn', 'cuda', 'cpp'])
-        if engine == 'cudnn':
-            engine = 'cuda'
-        self.layer = _create_layer(engine, 'Flatten')
+        # fltten layer is transparent to engine
+        self.layer = _create_layer('singa', 'Flatten')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 
 
 class RNN(Layer):
-    def __init__(self, name, hidden_size, rnn_mode='lstm', engine='cudnn',
-            dropout=0.0, num_stacks=1, input_mode='linear', bidirectional=False,
-            param_specs=None, input_sample_shape=None):
+    def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0,
+                 num_stacks=1, input_mode='linear', bidirectional=False,
+                 param_specs=None, input_sample_shape=None):
+        '''Wrapper for singa::RNN class.
+
+        Args:
+            hidden_size, hidden feature size, the same for all stacks of layers.
+            rnn_mode, decides the rnn unit, which could be one of 'lstm', 'gru',
+                'tanh' and 'relu', refer to cudnn manual for each mode.
+            num_stacks, num of stacks of rnn layers. It is different to the
+                unrolling seqence length.
+            input_mode, 'linear' convert the input feature x by by a linear
+                transformation to get a feature vector of size hidden_size;
+                'skip' does nothing but requires the input feature size equals
+                hidden_size
+            bidirection, True for bidirectional RNN
+            param_specs, config for initializing the RNN parameters.
+            input_sample_shape, includes a single integer for the input sample
+                feature size.
+        '''
         super(RNN, self).__init__(name)
         conf = self.conf.rnn_conf
         assert hidden_size > 0, 'Hidden feature size must > 0'
         conf.hidden_size = hidden_size
-        assert rnn_mode in Set(['lstm', 'gru', 'tanh', 'relu']), \
-                'rnn mode %s is not available' %s (rnn_mode)
+        assert rnn_mode in Set(['lstm', 'gru', 'tanh', 'relu']),  \
+            'rnn mode %s is not available' % (rnn_mode)
         conf.rnn_mode = rnn_mode
         conf.num_stacks = num_stacks
         conf.dropout = dropout
@@ -519,10 +544,11 @@ class RNN(Layer):
         conf.direction = 'unidirectional'
         if bidirectional:
             conf.direction = 'bidirectional'
+        # currently only has rnn layer implemented using cudnn
         _check_engine(engine, ['cudnn'])
         if param_specs is None:
             param_specs = {'name': name + '-weight',
-                    'init': 'uniform', 'low':0, 'high':1};
+                           'init': 'uniform', 'low': 0, 'high': 1}
         self.conf.param.extend([_construct_param_specs_from_dict(param_specs)])
         self.param_specs.append(_construct_param_specs_from_dict(param_specs))
 
@@ -531,18 +557,59 @@ class RNN(Layer):
             self.setup(input_sample_shape)
 
     def forward(self, flag, inputs):
+        '''Forward inputs through the RNN.
+
+        Args:
+            flag, kTrain or kEval.
+            inputs, <x1, x2,...xn, hx, cx>, where xi is the input tensor for the
+                i-th position, its shape is (batch_size, input_feature_length);
+                the batch_size of xi must >= that of xi+1; hx is the initial
+                hidden state of shape (num_stacks * bidirection?2:1, batch_size,
+                hidden_size). cx is the initial cell state tensor of the same
+                shape as hy. cx is valid for only lstm. For other RNNs there is
+                no cx. Both hx and cx could be dummy tensors without shape and
+                data.
+
+        Returns:
+            <y1, y2, ... yn, hy, cy>, where yi is the output tensor for the i-th
+                position, its shape is (batch_size,
+                hidden_size * bidirection?2:1). hy is the final hidden state
+                tensor. cx is the final cell state tensor. cx is only used for
+                lstm.
+        '''
         assert self.has_setup, 'Must call setup() before forward()'
         assert len(inputs) > 1, 'The input to RNN must include at '\
-                'least one input tensor '\
-                'and one hidden state tensor (could be a dummy tensor)'
+            'least one input tensor '\
+            'and one hidden state tensor (could be a dummy tensor)'
         tensors = []
         for t in inputs:
-            assert isinstance(t, tensor.Tensor), 'input must be py Tensor %s' % (type(t))
+            assert isinstance(t, tensor.Tensor), \
+                'input must be py Tensor %s' % (type(t))
             tensors.append(t.singa_tensor)
         y = self.layer.Forward(flag, tensors)
         return tensor.from_raw_tensors(y)
 
     def backward(self, flag, grad):
+        '''Backward gradients through the RNN.
+
+        Args:
+            flag, for future use.
+            grad, <dy1, dy2,...dyn, dhy, dcy>, where dyi is the gradient for the
+            i-th output, its shape is (batch_size, hidden_size*bidirection?2:1);
+                dhy is the gradient for the final hidden state, its shape is
+                (num_stacks * bidirection?2:1, batch_size,
+                hidden_size). dcy is the gradient for the final cell state.
+                cx is valid only for lstm. For other RNNs there is
+                no cx. Both dhy and dcy could be dummy tensors without shape and
+                data.
+
+        Returns:
+            <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for
+            the i-th input, its shape is (batch_size,
+                input_feature_length). dhx is the gradient for the initial
+                hidden state. dcx is the gradient for the initial cell state,
+                which is valid only for lstm.
+        '''
         tensors = []
         for t in grad:
             assert isinstance(t, tensor.Tensor), 'grad must be py Tensor'
@@ -550,21 +617,23 @@ class RNN(Layer):
         ret = self.layer.Backward(flag, tensors)
         return tensor.from_raw_tensors(ret[0]), tensor.from_raw_tensors(ret[1])
 
+
 class LSTM(RNN):
-    def __init__(self, name, hidden_size, engine='cudnn',
-            dropout=0.0, num_stacks=1, input_mode='linear', bidirectional=False,
-            param_specs=None, input_sample_shape=None):
-        super(LSTM, self).__init__(name, hidden_size,  'lstm', engine, dropout,
-                num_stacks, input_mode, bidirectional, param_specs,
-                input_sample_shape)
+    def __init__(self, name, hidden_size, dropout=0.0, num_stacks=1,
+                 input_mode='linear', bidirectional=False,
+                 param_specs=None, input_sample_shape=None):
+        super(LSTM, self).__init__(name, hidden_size,  'lstm',  dropout,
+                                   num_stacks, input_mode, bidirectional,
+                                   param_specs, input_sample_shape)
+
 
 class GRU(RNN):
-    def __init__(self, name, hidden_size, engine='cudnn',
-            dropout=0.0, num_stacks=1, input_mode='linear', bidirectional=False,
-            param_specs=None, input_sample_shape=None):
-        super(GRU, self).__init__(name,  hidden_size, 'gru', engine, dropout,
-                num_stacks, input_mode, bidirectional, param_specs,
-                input_sample_shape)
+    def __init__(self, name, hidden_size, dropout=0.0, num_stacks=1,
+                 input_mode='linear', bidirectional=False, param_specs=None,
+                 input_sample_shape=None):
+        super(GRU, self).__init__(name,  hidden_size, 'gru',  dropout,
+                                  num_stacks, input_mode, bidirectional,
+                                  param_specs, input_sample_shape)
 
 
 def _check_engine(engine, allowed_engines):
@@ -573,12 +642,17 @@ def _check_engine(engine, allowed_engines):
            (engine, ', '.join(allowed_engines))
 
 
-def _create_layer(engine, layer):
-    if engine == 'cuda' or engine == 'cpp':
-        layer_type = layer
-    else:
-        layer_type = engine.title() + layer
-    return singa_wrap.CreateLayer(layer_type)
+def _create_layer(eng, layer):
+    ''' create singa wrap layer.
+
+    Both arguments are case insensitive.
+    Args:
+        engine, implementation engine, either 'singa' or 'cudnn'
+        layer, layer type, e.g., 'convolution', 'pooling'; for activation
+        layers, use the specific activation mode, e.g. 'relu', 'tanh'.
+    '''
+    layer_type = eng + '_' + layer
+    return singa_wrap.CreateLayer(layer_type.lower())
 
 
 def _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
index c0ba61d..1617717 100644
--- a/src/python/singa/net.py
+++ b/src/python/singa/net.py
@@ -92,17 +92,17 @@ class FeedForwardNet(object):
         return tensor.softmax(xx)
 
     def forward(self, flag, x):
-        #print x.l1()
+        # print x.l1()
         for lyr in self.layers:
             x = lyr.forward(flag, x)
         #    print lyr.name, x.l1()
         return x
 
-    def backward(self, flag=kTrain):
+    def backward(self):
         grad = self.loss.backward()
         pgrads = []
         for lyr in reversed(self.layers):
-            grad, _pgrads = lyr.backward(flag, grad)
+            grad, _pgrads = lyr.backward(kTrain, grad)
             for g in reversed(_pgrads):
                 pgrads.append(g)
         return reversed(pgrads)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index 2d6fa5a..6e84a4f 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -39,7 +39,7 @@ class Tensor(object):
             return
         else:
             assert isinstance(shape, tuple), 'shape should be tuple'
-            vs = _tuple_to_vector(shape)
+            vs = list(shape)
             if device is None:
                 self.singa_tensor = singa.Tensor(vs, dtype)
             else:
@@ -111,8 +111,9 @@ class Tensor(object):
         return self.singa_tensor.L1()
 
     def set_value(self, x):
-        if isinstance(x, float):
-            self.singa_tensor.floatSetValue(x)
+        # assert type(x) == float, 'set value only accepts float input'
+        # if isinstance(x, float):
+        self.singa_tensor.floatSetValue(x)
 
     def copy_data(self, t):
         self.singa_tensor.CopyData(t.singa_tensor)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/python/swig/core_device.i
----------------------------------------------------------------------
diff --git a/src/python/swig/core_device.i b/src/python/swig/core_device.i
index b79d37e..a5d0731 100644
--- a/src/python/swig/core_device.i
+++ b/src/python/swig/core_device.i
@@ -58,6 +58,10 @@ class Platform {
   static const std::string DeviceQuery(int id, bool verbose = false);
   static const std::vector<std::shared_ptr<Device> >
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
+  static const std::vector<std::shared_ptr<Device>>
+  CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+  static std::shared_ptr<Device> GetDefaultDevice();
 };
+
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/python/swig/model_layer.i
----------------------------------------------------------------------
diff --git a/src/python/swig/model_layer.i b/src/python/swig/model_layer.i
index 6cbfe8f..a6cdad1 100644
--- a/src/python/swig/model_layer.i
+++ b/src/python/swig/model_layer.i
@@ -81,7 +81,6 @@ const std::vector<std::string> GetRegisteredLayers();
 class RNN : public Layer {
 };
 
-#if CUDNN_VERSION_MINOR >= 5 && CUDNN_VERSION_PATCH >= 5
 class CudnnRNN : public RNN {
  public:
  // note: Must use std::vector instead of vector.
@@ -93,7 +92,5 @@ class CudnnRNN : public RNN {
     const std::vector<size_t> GetOutputSampleShape() const override;
 };
 
-#endif  // CUDNN_VERSION_MINOR >= 5 && CUDNN_VERSION_PATCH >= 5
-
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_activation.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_activation.cc b/test/singa/test_activation.cc
index 001c49c..bb8ad84 100644
--- a/test/singa/test_activation.cc
+++ b/test/singa/test_activation.cc
@@ -27,15 +27,15 @@ using singa::Activation;
 using singa::Shape;
 TEST(Activation, Setup) {
   Activation acti;
-  EXPECT_EQ("Activation", acti.layer_type());
+  // EXPECT_EQ("Activation", acti.layer_type());
 
   singa::LayerConf conf;
-  conf.set_type("RELU");
+  conf.set_type("singa_relu");
   singa::ReLUConf* reluconf = conf.mutable_relu_conf();
   reluconf->set_negative_slope(0.5);
 
   acti.Setup(Shape{3}, conf);
-  EXPECT_EQ("RELU", acti.Mode());
+  EXPECT_EQ("relu", acti.Mode());
   EXPECT_EQ(0.5f, acti.Negative_slope());
 }
 
@@ -46,13 +46,13 @@ TEST(Activation, Forward) {
   in.CopyDataFromHostPtr<float>(x, n);
 
   float neg_slope = 0.5f;
-  std::string types[] = {"SIGMOID","TANH","RELU"};
+  std::string types[] = {"singa_sigmoid", "singa_tanh", "singa_relu"};
   for (int j = 0; j < 3; j++) {
     Activation acti;
     singa::LayerConf conf;
     std::string layertype = types[j];
     conf.set_type(layertype);
-    if (layertype == "RELU") {
+    if (layertype == "relu") {
       singa::ReLUConf* reluconf = conf.mutable_relu_conf();
       reluconf->set_negative_slope(neg_slope);
     }
@@ -64,15 +64,15 @@ TEST(Activation, Forward) {
     EXPECT_EQ(n, out.Size());
 
     float* y = new float[n];
-    if (acti.Mode() == "SIGMOID") {
+    if (acti.Mode() == "sigmoid") {
       for (size_t i = 0; i < n; i++)
         y[i] = 1.f / (1.f + exp(-x[i]));
     }
-    else if (acti.Mode() == "TANH") {
+    else if (acti.Mode() == "tanh") {
       for (size_t i = 0; i < n; i++)
         y[i] = tanh(x[i]);
     }
-    else if (acti.Mode() == "RELU") {
+    else if (acti.Mode() == "relu") {
       for (size_t i = 0; i < n; i++)
         y[i] = (x[i] >= 0.f) ? x[i] : 0.f;
     }
@@ -92,13 +92,13 @@ TEST(Activation, Backward) {
   in.CopyDataFromHostPtr<float>(x, n);
 
   float neg_slope = 0.5f;
-  std::string types[] = {"SIGMOID","TANH","RELU"};
+  std::string types[] = {"singa_sigmoid", "singa_tanh", "singa_relu"};
   for (int j = 0; j < 3; j++) {
     Activation acti;
     singa::LayerConf conf;
     std::string layertype = types[j];
     conf.set_type(layertype);
-    if (layertype == "RELU") {
+    if (layertype == "relu") {
       singa::ReLUConf* reluconf = conf.mutable_relu_conf();
       reluconf->set_negative_slope(neg_slope);
     }
@@ -114,15 +114,15 @@ TEST(Activation, Backward) {
     const float* xptr = in_diff.first.data<float>();
 
     float* dx = new float[n];
-    if (acti.Mode() == "SIGMOID") {
+    if (acti.Mode() == "sigmoid") {
       for (size_t i = 0; i < n; i++)
         dx[i] = grad[i] * yptr[i] * (1. - yptr[i]);
     }
-    else if (acti.Mode() == "TANH") {
+    else if (acti.Mode() == "tanh") {
       for (size_t i = 0; i < n; i++)
         dx[i] = grad[i] * (1 - yptr[i] * yptr[i]);
     }
-    else if (acti.Mode() == "RELU") {
+    else if (acti.Mode() == "relu") {
       for (size_t i = 0; i < n; i++)
         dx[i] = grad[i] * (x[i] > 0.f) + acti.Negative_slope() * (x[i] <= 0.f);
     }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_batchnorm.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_batchnorm.cc b/test/singa/test_batchnorm.cc
index c72dc0f..a61f6f3 100644
--- a/test/singa/test_batchnorm.cc
+++ b/test/singa/test_batchnorm.cc
@@ -27,7 +27,7 @@ using namespace singa;
 
 TEST(BatchNorm, Setup) {
   BatchNorm batchnorm;
-  EXPECT_EQ("BatchNorm", batchnorm.layer_type());
+  // EXPECT_EQ("BatchNorm", batchnorm.layer_type());
 
   singa::LayerConf conf;
   singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();
@@ -68,10 +68,10 @@ TEST(BatchNorm, Forward) {
   EXPECT_EQ(1u, shape[1]);
   EXPECT_EQ(2u, shape[2]);
   EXPECT_EQ(1u, shape[3]);
-  EXPECT_NEAR(1.0f, outptr[0], 1e-6f);
-  EXPECT_NEAR(1.0f, outptr[1], 1e-6f);
-  EXPECT_NEAR(3.0f, outptr[2], 1e-6f);
-  EXPECT_NEAR(3.0f, outptr[3], 1e-6f);
+  EXPECT_NEAR(1.0f, outptr[0], 1e-4f);
+  EXPECT_NEAR(1.0f, outptr[1], 1e-4f);
+  EXPECT_NEAR(3.0f, outptr[2], 1e-4f);
+  EXPECT_NEAR(3.0f, outptr[3], 1e-4f);
 }
 
 TEST(BatchNorm, Backward) {
@@ -107,10 +107,10 @@ TEST(BatchNorm, Backward) {
   EXPECT_EQ(2u, shape[2]);
   EXPECT_EQ(1u, shape[3]);
   const float *dxptr = ret.first.data<float>();
-  EXPECT_NEAR(.0f, dxptr[0], 1e-6f);
-  EXPECT_NEAR(.0f, dxptr[1], 1e-6f);
-  EXPECT_NEAR(.0f, dxptr[2], 1e-6f);
-  EXPECT_NEAR(.0f, dxptr[3], 1e-6f);
+  EXPECT_NEAR(.0f, dxptr[0], 1e-4f);
+  EXPECT_NEAR(.0f, dxptr[1], 1e-4f);
+  EXPECT_NEAR(.0f, dxptr[2], 1e-4f);
+  EXPECT_NEAR(.0f, dxptr[3], 1e-4f);
 
   Tensor dbnScale = ret.second.at(0);
   const float *dbnScaleptr = dbnScale.data<float>();
@@ -118,8 +118,8 @@ TEST(BatchNorm, Backward) {
   EXPECT_EQ(1u, dbnScaleShape.size());
   EXPECT_EQ(2u, dbnScaleShape[0]);
 
-  EXPECT_NEAR(-2.0f, dbnScaleptr[0], 1e-6f);
-  EXPECT_NEAR(-2.0f, dbnScaleptr[1], 1e-6f);
+  EXPECT_NEAR(-2.0f, dbnScaleptr[0], 1e-4f);
+  EXPECT_NEAR(-2.0f, dbnScaleptr[1], 1e-4f);
 
   Tensor dbnBias = ret.second.at(1);
   const float *dbnBiasptr = dbnBias.data<float>();
@@ -127,6 +127,6 @@ TEST(BatchNorm, Backward) {
   EXPECT_EQ(1u, dbnBiasShape.size());
   EXPECT_EQ(2u, dbnBiasShape[0]);
 
-  EXPECT_NEAR(6.0f, dbnBiasptr[0], 1e-6f);
-  EXPECT_NEAR(4.0f, dbnBiasptr[1], 1e-6f);
+  EXPECT_NEAR(6.0f, dbnBiasptr[0], 1e-4f);
+  EXPECT_NEAR(4.0f, dbnBiasptr[1], 1e-4f);
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_convolution.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_convolution.cc b/test/singa/test_convolution.cc
index c3ddcee..4cfb38d 100644
--- a/test/singa/test_convolution.cc
+++ b/test/singa/test_convolution.cc
@@ -29,7 +29,7 @@ using singa::Convolution;
 using singa::Shape;
 TEST(Convolution, Setup) {
   Convolution conv;
-  EXPECT_EQ("Convolution", conv.layer_type());
+  // EXPECT_EQ("Convolution", conv.layer_type());
 
   singa::LayerConf conf;
   singa::ConvolutionConf *convconf = conf.mutable_convolution_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_activation.cc b/test/singa/test_cudnn_activation.cc
index 9279d6c..6a989d1 100644
--- a/test/singa/test_cudnn_activation.cc
+++ b/test/singa/test_cudnn_activation.cc
@@ -29,12 +29,12 @@
 
 using singa::CudnnActivation;
 using singa::Shape;
-TEST(TCudnnActivation, Setup) {
+TEST(CudnnActivation, Setup) {
   CudnnActivation acti;
-  EXPECT_EQ("CudnnActivation", acti.layer_type());
+  // EXPECT_EQ("CudnnActivation", acti.layer_type());
 
   singa::LayerConf conf;
-  conf.set_type("RELU");
+  conf.set_type("cudnn_relu");
   singa::ReLUConf* reluconf = conf.mutable_relu_conf();
   reluconf->set_negative_slope(0.5f);
 
@@ -43,7 +43,7 @@ TEST(TCudnnActivation, Setup) {
   EXPECT_EQ(0.5f, acti.Negative_slope());
 }
 
-TEST(TCudnnActivation, Forward) {
+TEST(CudnnActivation, Forward) {
   const float x[] = {1.0f, 2.0f, 3.0f, -2.0f, -3.0f, -4.0};
   size_t n = sizeof(x) / sizeof(float);
   auto cuda = std::make_shared<singa::CudaGPU>();
@@ -51,13 +51,13 @@ TEST(TCudnnActivation, Forward) {
   in.CopyDataFromHostPtr<float>(x, n);
 
   float neg_slope = 0.5f;
-  std::string types[] = {"SIGMOID", "TANH", "RELU"};
+  std::string types[] = {"cudnn_sigmoid", "cudnn_tanh", "cudnn_relu"};
   for (int j = 0; j < 3; j++) {
     CudnnActivation acti;
     singa::LayerConf conf;
     std::string layertype = types[j];
     conf.set_type(layertype);
-    if (layertype == "RELU") {
+    if (layertype == "relu") {
       singa::ReLUConf* reluconf = conf.mutable_relu_conf();
       reluconf->set_negative_slope(neg_slope);
     }
@@ -68,11 +68,11 @@ TEST(TCudnnActivation, Forward) {
     out.ToHost();
     const float* yptr = out.data<float>();
     float* y = new float[n];
-    if (acti.Mode() == "SIGMOID") {
+    if (acti.Mode() == "sigmoid") {
       for (size_t i = 0; i < n; i++) y[i] = 1.f / (1.f + exp(-x[i]));
-    } else if (acti.Mode() == "TANH") {
+    } else if (acti.Mode() == "tanh") {
       for (size_t i = 0; i < n; i++) y[i] = tanh(x[i]);
-    } else if (acti.Mode() == "RELU") {
+    } else if (acti.Mode() == "relu") {
       for (size_t i = 0; i < n; i++) y[i] = (x[i] >= 0.f) ? x[i] : 0.f;
     } else
       LOG(FATAL) << "Unkown activation: " << acti.Mode();
@@ -83,14 +83,14 @@ TEST(TCudnnActivation, Forward) {
   }
 }
 
-TEST(TCudnnActivation, Backward) {
+TEST(CudnnActivation, Backward) {
   const float x[] = {2.0f, 3.0f, 3.0f, 7.f, 0.0f, 5.0, 1.5, 2.5, -2.5, 1.5};
   size_t n = sizeof(x) / sizeof(float);
   auto cuda = std::make_shared<singa::CudaGPU>();
   singa::Tensor in(singa::Shape{n}, cuda);
   in.CopyDataFromHostPtr<float>(x, n);
   float neg_slope = 0.5f;
-  std::string types[] = {"SIGMOID", "TANH", "RELU"};
+  std::string types[] = {"cudnn_sigmoid", "cudnn_tanh", "cudnn_relu"};
   for (int j = 0; j < 3; j++) {
     CudnnActivation acti;
     singa::LayerConf conf;
@@ -115,11 +115,11 @@ TEST(TCudnnActivation, Backward) {
     in_diff.ToHost();
     const float* xptr = in_diff.data<float>();
     float* dx = new float[n];
-    if (acti.Mode() == "SIGMOID") {
+    if (acti.Mode() == "sigmoid") {
       for (size_t i = 0; i < n; i++) dx[i] = grad[i] * yptr[i] * (1. - yptr[i]);
-    } else if (acti.Mode() == "TANH") {
+    } else if (acti.Mode() == "tanh") {
       for (size_t i = 0; i < n; i++) dx[i] = grad[i] * (1. - yptr[i] * yptr[i]);
-    } else if (acti.Mode() == "RELU") {
+    } else if (acti.Mode() == "relu") {
       for (size_t i = 0; i < n; i++)
         dx[i] =
             grad[i] * (x[i] > 0.f);  //+ acti.Negative_slope() * (x[i] <= 0.f);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_batchnorm.cc b/test/singa/test_cudnn_batchnorm.cc
index 4f6a38b..b2746dc 100644
--- a/test/singa/test_cudnn_batchnorm.cc
+++ b/test/singa/test_cudnn_batchnorm.cc
@@ -28,7 +28,7 @@ using singa::CudnnBatchNorm;
 using singa::Shape;
 TEST(CudnnBatchNorm, Setup) {
   CudnnBatchNorm batchnorm;
-  EXPECT_EQ("CudnnBatchNorm", batchnorm.layer_type());
+  // EXPECT_EQ("CudnnBatchNorm", batchnorm.layer_type());
 
   singa::LayerConf conf;
   singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_convolution.cc b/test/singa/test_cudnn_convolution.cc
index 66c62f6..8dbee63 100644
--- a/test/singa/test_cudnn_convolution.cc
+++ b/test/singa/test_cudnn_convolution.cc
@@ -27,7 +27,7 @@ using singa::CudnnConvolution;
 using singa::Shape;
 TEST(CudnnConvolution, Setup) {
   CudnnConvolution conv;
-  EXPECT_EQ("CudnnConvolution", conv.layer_type());
+  // EXPECT_EQ("CudnnConvolution", conv.layer_type());
 
   singa::LayerConf conf;
   singa::ConvolutionConf *convconf = conf.mutable_convolution_conf();
@@ -199,7 +199,7 @@ TEST(CudnnConvolution, Backward) {
 // Tests for prefer=autotune
 TEST(CudnnConvolution_AT, Setup) {
   CudnnConvolution conv;
-  EXPECT_EQ("CudnnConvolution", conv.layer_type());
+  // EXPECT_EQ("CudnnConvolution", conv.layer_type());
 
   singa::LayerConf conf;
   singa::ConvolutionConf *convconf = conf.mutable_convolution_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_dropout.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_dropout.cc b/test/singa/test_cudnn_dropout.cc
index 7f28aca..4a89235 100644
--- a/test/singa/test_cudnn_dropout.cc
+++ b/test/singa/test_cudnn_dropout.cc
@@ -36,7 +36,7 @@ using singa::CudnnDropout;
 using singa::Shape;
 TEST(CudnnDropout, Setup) {
   CudnnDropout drop;
-  EXPECT_EQ("CudnnDropout", drop.layer_type());
+  // EXPECT_EQ("CudnnDropout", drop.layer_type());
 
   singa::LayerConf conf;
   singa::DropoutConf* dropconf = conf.mutable_dropout_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_lrn.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_lrn.cc b/test/singa/test_cudnn_lrn.cc
index 23fbe2e..04ca5f2 100644
--- a/test/singa/test_cudnn_lrn.cc
+++ b/test/singa/test_cudnn_lrn.cc
@@ -30,7 +30,7 @@ using singa::CudnnLRN;
 using singa::Shape;
 TEST(CudnnLRN, Setup) {
   CudnnLRN lrn;
-  EXPECT_EQ("CudnnLRN", lrn.layer_type());
+  // EXPECT_EQ("CudnnLRN", lrn.layer_type());
 
   singa::LayerConf conf;
   singa::LRNConf *lrn_conf = conf.mutable_lrn_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_pooling.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_pooling.cc b/test/singa/test_cudnn_pooling.cc
index 5c01889..0e3314e 100644
--- a/test/singa/test_cudnn_pooling.cc
+++ b/test/singa/test_cudnn_pooling.cc
@@ -27,7 +27,7 @@ using singa::CudnnPooling;
 using singa::Shape;
 TEST(CudnnPooling, Setup) {
   CudnnPooling pool;
-  EXPECT_EQ("CudnnPooling", pool.layer_type());
+  //  EXPECT_EQ("CudnnPooling", pool.layer_type());
 
   singa::LayerConf conf;
   singa::PoolingConf *poolconf = conf.mutable_pooling_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_rnn.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_rnn.cc b/test/singa/test_cudnn_rnn.cc
index effb3b1..e293cf7 100644
--- a/test/singa/test_cudnn_rnn.cc
+++ b/test/singa/test_cudnn_rnn.cc
@@ -45,7 +45,7 @@ class TestCudnnRNN : public ::testing::Test {
 
 TEST_F(TestCudnnRNN, Setup) {
   CudnnRNN rnn;
-  EXPECT_EQ("CudnnRNN", rnn.layer_type());
+  // EXPECT_EQ("CudnnRNN", rnn.layer_type());
   rnn.Setup(Shape{2}, conf);
   auto weight = rnn.param_values().at(0);
   EXPECT_EQ(weight.Size(), hidden_size * (2 + hidden_size + 2));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_cudnn_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_softmax.cc b/test/singa/test_cudnn_softmax.cc
index 2b88843..6e0d5ab 100644
--- a/test/singa/test_cudnn_softmax.cc
+++ b/test/singa/test_cudnn_softmax.cc
@@ -31,7 +31,7 @@ using singa::CudnnSoftmax;
 using singa::Shape;
 TEST(CudnnSoftmax, Setup) {
   CudnnSoftmax sft;
-  EXPECT_EQ("CudnnSoftmax", sft.layer_type());
+  // EXPECT_EQ("CudnnSoftmax", sft.layer_type());
 
   singa::LayerConf conf;
   singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
index f4ecdfc..17e161a 100644
--- a/test/singa/test_dense.cc
+++ b/test/singa/test_dense.cc
@@ -26,7 +26,7 @@ using singa::Dense;
 using singa::Shape;
 TEST(Dense, Setup) {
   Dense dense;
-  EXPECT_EQ("Dense", dense.layer_type());
+  // EXPECT_EQ("Dense", dense.layer_type());
 
   singa::LayerConf conf;
   singa::DenseConf *denseconf = conf.mutable_dense_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_dropout.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dropout.cc b/test/singa/test_dropout.cc
index 3dd988a..b0c34a3 100644
--- a/test/singa/test_dropout.cc
+++ b/test/singa/test_dropout.cc
@@ -26,7 +26,7 @@ using singa::Dropout;
 using singa::Shape;
 TEST(Dropout, Setup) {
   Dropout drop;
-  EXPECT_EQ("Dropout", drop.layer_type());
+  // EXPECT_EQ("Dropout", drop.layer_type());
 
   singa::LayerConf conf;
   singa::DropoutConf* dropconf = conf.mutable_dropout_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_flatten.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_flatten.cc b/test/singa/test_flatten.cc
index 25e00c4..65748f7 100644
--- a/test/singa/test_flatten.cc
+++ b/test/singa/test_flatten.cc
@@ -26,7 +26,7 @@ using singa::Flatten;
 using singa::Shape;
 TEST(Flatten, Setup) {
   Flatten flt;
-  EXPECT_EQ("Flatten", flt.layer_type());
+  // EXPECT_EQ("Flatten", flt.layer_type());
 
   singa::LayerConf conf;
   singa::FlattenConf *flattenconf = conf.mutable_flatten_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_layer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_layer.cc b/test/singa/test_layer.cc
index 4071762..aa01746 100644
--- a/test/singa/test_layer.cc
+++ b/test/singa/test_layer.cc
@@ -4,26 +4,25 @@
 
 TEST(Layer, CreateLayer) {
   std::vector<std::string> types{
-      "Convolution", "Dense", "Dropout", "Activation", "BatchNorm",
-      "Flatten",     "LRN",   "Pooling", "PReLU",      "Softmax"};
+      "convolution", "dense", "dropout", "relu", "batchnorm",
+      "flatten",     "lrn",   "pooling", "prelu",      "softmax"};
   for (auto type : types) {
-    auto layer = singa::CreateLayer(type);
-    EXPECT_EQ(layer->layer_type(), type);
+    auto layer = singa::CreateLayer("singa_" + type);
+    // EXPECT_EQ(layer->layer_type(), type);
   }
 }
 
 #ifdef USE_CUDNN
 TEST(Layer, CreateCudnnLayer) {
   std::vector<std::string> types{
-      "CudnnConvolution", "CudnnActivation",
-      "CudnnBatchNorm",   "Flatten",      "CudnnLRN",
-      "CudnnPooling",     "PReLU",        "CudnnSoftmax"};
+      "convolution", "dropout", "relu", "batchnorm",
+      "lrn",   "pooling", "softmax"};
 #if CUDNN_VERSION_MAJOR >= 5
-  types.push_back("CudnnDropout");
+  types.push_back("dropout");
 #endif
   for (auto type : types) {
-    auto layer = singa::CreateLayer(type);
-    EXPECT_EQ(layer->layer_type(), type);
+    auto layer = singa::CreateLayer("cudnn_" + type);
+    // EXPECT_EQ(layer->layer_type(), type);
   }
 }
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_lrn.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_lrn.cc b/test/singa/test_lrn.cc
index 5de4535..454e1a9 100644
--- a/test/singa/test_lrn.cc
+++ b/test/singa/test_lrn.cc
@@ -26,7 +26,7 @@ using namespace singa;
 
 TEST(LRN, Setup) {
   LRN lrn;
-  EXPECT_EQ("LRN", lrn.layer_type());
+  // EXPECT_EQ("LRN", lrn.layer_type());
 
   LayerConf conf;
   LRNConf *lrn_conf = conf.mutable_lrn_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_pooling.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_pooling.cc b/test/singa/test_pooling.cc
index 3089a90..7ba56d1 100644
--- a/test/singa/test_pooling.cc
+++ b/test/singa/test_pooling.cc
@@ -26,7 +26,7 @@ using singa::Pooling;
 using singa::Shape;
 TEST(Pooling, Setup) {
   Pooling pool;
-  EXPECT_EQ("Pooling", pool.layer_type());
+  //  EXPECT_EQ("Pooling", pool.layer_type());
 
   singa::LayerConf conf;
   singa::PoolingConf *poolconf = conf.mutable_pooling_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_prelu.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_prelu.cc b/test/singa/test_prelu.cc
index dbb7cde..77b4b74 100644
--- a/test/singa/test_prelu.cc
+++ b/test/singa/test_prelu.cc
@@ -27,7 +27,7 @@ using singa::PReLU;
 using singa::Shape;
 TEST(PReLU, Setup) {
   PReLU prelu;
-  EXPECT_EQ("PReLU", prelu.layer_type());
+  // EXPECT_EQ("PReLU", prelu.layer_type());
 
   singa::LayerConf conf;
   singa::PReLUConf *preluconf = conf.mutable_prelu_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/test/singa/test_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_softmax.cc b/test/singa/test_softmax.cc
index 00b8378..8064b80 100644
--- a/test/singa/test_softmax.cc
+++ b/test/singa/test_softmax.cc
@@ -27,7 +27,7 @@ using singa::Softmax;
 using singa::Shape;
 TEST(Softmax, Setup) {
   Softmax sft;
-  EXPECT_EQ("Softmax", sft.layer_type());
+  // EXPECT_EQ("Softmax", sft.layer_type());
 
   singa::LayerConf conf;
   sft.Setup(Shape{3}, conf);

[09/51] [abbrv] incubator-singa git commit: SINGA-235 - Unify the engines for cudnn and singa layers

Posted by wa...@apache.org.

SINGA-235 - Unify the engines for cudnn and singa layers

Fixed a bug in alexnet.cc caused by forgeting to udpate the layer
construction code.
Updated some code to avoid the warnings from the compilation.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a91bf2a7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a91bf2a7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a91bf2a7

Branch: refs/heads/master
Commit: a91bf2a7ef9b8ba86973e579f4e170a0aa816444
Parents: 94ffe55
Author: Wei Wang <wa...@gmail.com>
Authored: Fri Aug 12 17:17:03 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Fri Aug 12 17:19:21 2016 +0800

----------------------------------------------------------------------
 cmake/Thirdparty/FindCUDNN.cmake     |  2 +-
 examples/imagenet/alexnet.cc         | 49 +++++++++++++++----------------
 test/singa/test_image_transformer.cc | 20 ++++++-------
 3 files changed, 34 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a91bf2a7/cmake/Thirdparty/FindCUDNN.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindCUDNN.cmake b/cmake/Thirdparty/FindCUDNN.cmake
index fbc103c..32b927b 100644
--- a/cmake/Thirdparty/FindCUDNN.cmake
+++ b/cmake/Thirdparty/FindCUDNN.cmake
@@ -27,7 +27,7 @@ IF(CUDNN_FOUND)
     ELSE()
       MATH(EXPR CUDNN_VERSION_SWIG "${CUDNN_VERSION_MAJOR} * 1000 + ${CUDNN_VERSION_MINOR} * 100 + ${CUDNN_VERSION_PATCH}")
     ENDIF()
-    MESSAGE(STATUS "Found Cudnn_v${CUDNN_VERSION} at ${CUDNN_INCLUDE_DIR} ${CUDNN_LIBRARIES}")
+    MESSAGE(STATUS "Found Cudnn_v${CUDNN_VERSION_SWIG} at ${CUDNN_INCLUDE_DIR} ${CUDNN_LIBRARIES}")
     MARK_AS_ADVANCED(CUDNN_INCLUDE_DIR CUDNN_LIBRARIES)
 
 ENDIF()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a91bf2a7/examples/imagenet/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/imagenet/alexnet.cc b/examples/imagenet/alexnet.cc
index 3fb5d04..26b2d96 100644
--- a/examples/imagenet/alexnet.cc
+++ b/examples/imagenet/alexnet.cc
@@ -137,32 +137,29 @@ FeedForwardNet CreateNet() {
   FeedForwardNet net;
   Shape s{3, 227, 227};
 
-  net.Add(new CudnnConvolution(), GenConvConf("conv1", 96, 11, 4, 0, 0.01), &s);
-  net.Add(new CudnnActivation(), GenReLUConf("relu1"));
-  net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 3, 2, 0));
-  net.Add(new CudnnLRN(), GenLRNConf("lrn1"));
-  net.Add(new CudnnConvolution(),
-          GenConvConf("conv2", 256, 5, 1, 2, 0.01, 1.0));
-  net.Add(new CudnnActivation(), GenReLUConf("relu2"));
-  net.Add(new CudnnPooling(), GenPoolingConf("pool2", true, 3, 2, 0));
-  net.Add(new CudnnLRN(), GenLRNConf("lrn2"));
-  net.Add(new CudnnConvolution(), GenConvConf("conv3", 384, 3, 1, 1, 0.01));
-  net.Add(new CudnnActivation(), GenReLUConf("relu3"));
-  net.Add(new CudnnConvolution(),
-          GenConvConf("conv4", 384, 3, 1, 1, 0.01, 1.0));
-  net.Add(new CudnnActivation(), GenReLUConf("relu4"));
-  net.Add(new CudnnConvolution(),
-          GenConvConf("conv5", 256, 3, 1, 1, 0.01, 1.0));
-  net.Add(new CudnnActivation(), GenReLUConf("relu5"));
-  net.Add(new CudnnPooling(), GenPoolingConf("pool5", true, 3, 2, 0));
-  net.Add(new Flatten(), GenFlattenConf("flat"));
-  net.Add(new Dense(), GenDenseConf("ip6", 4096, 0.005, 1, 1.0));
-  net.Add(new CudnnActivation(), GenReLUConf("relu6"));
-  net.Add(new Dropout(), GenDropoutConf("drop6", 0.5));
-  net.Add(new Dense(), GenDenseConf("ip7", 4096, 0.005, 1, 1.0));
-  net.Add(new CudnnActivation(), GenReLUConf("relu7"));
-  net.Add(new Dropout(), GenDropoutConf("drop7", 0.5));
-  net.Add(new Dense(), GenDenseConf("ip8", 1000, 0.01, 1));
+  net.Add(GenConvConf("conv1", 96, 11, 4, 0, 0.01), &s);
+  net.Add(GenReLUConf("relu1"));
+  net.Add(GenPoolingConf("pool1", true, 3, 2, 0));
+  net.Add(GenLRNConf("lrn1"));
+  net.Add(GenConvConf("conv2", 256, 5, 1, 2, 0.01, 1.0));
+  net.Add(GenReLUConf("relu2"));
+  net.Add(GenPoolingConf("pool2", true, 3, 2, 0));
+  net.Add(GenLRNConf("lrn2"));
+  net.Add(GenConvConf("conv3", 384, 3, 1, 1, 0.01));
+  net.Add(GenReLUConf("relu3"));
+  net.Add(GenConvConf("conv4", 384, 3, 1, 1, 0.01, 1.0));
+  net.Add(GenReLUConf("relu4"));
+  net.Add(GenConvConf("conv5", 256, 3, 1, 1, 0.01, 1.0));
+  net.Add(GenReLUConf("relu5"));
+  net.Add(GenPoolingConf("pool5", true, 3, 2, 0));
+  net.Add(GenFlattenConf("flat"));
+  net.Add(GenDenseConf("ip6", 4096, 0.005, 1, 1.0));
+  net.Add(GenReLUConf("relu6"));
+  net.Add(GenDropoutConf("drop6", 0.5));
+  net.Add(GenDenseConf("ip7", 4096, 0.005, 1, 1.0));
+  net.Add(GenReLUConf("relu7"));
+  net.Add(GenDropoutConf("drop7", 0.5));
+  net.Add(GenDenseConf("ip8", 1000, 0.01, 1));
 
   return net;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a91bf2a7/test/singa/test_image_transformer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_image_transformer.cc b/test/singa/test_image_transformer.cc
index 92eb1a6..4540aa8 100644
--- a/test/singa/test_image_transformer.cc
+++ b/test/singa/test_image_transformer.cc
@@ -61,7 +61,7 @@ TEST(ImageTransformer, Apply3D) {
   srand(time(NULL));
   for (size_t i = 0; i < n; i++) x[i] = (float)(rand() % 256);
   in.CopyDataFromHostPtr<float>(x, n);
-  size_t resize_height = 4, resize_width = 6;
+  int resize_height = 4, resize_width = 6;
 
   singa::ImageTransformer img_transformer;
   singa::TransformerConf conf;
@@ -90,8 +90,8 @@ TEST(ImageTransformer, Apply3D) {
   EXPECT_EQ(resize_width, resized.size().width);
   size_t new_size = resize_height * resize_width * channel;
   float* xt = new float[new_size];
-  for (size_t i = 0; i < resize_height; i++)
-    for (size_t j = 0; j < resize_width; j++)
+  for (int i = 0; i < resize_height; i++)
+    for (int j = 0; j < resize_width; j++)
       for (size_t k = 0; k < channel; k++)
         xt[i * resize_width * channel + j * channel + k] = resized.at<cv::Vec3f>(i, j)[k];
   for (size_t c = 0; c < 3; c++)
@@ -128,7 +128,7 @@ TEST(ImageTransformer, Apply2D) {
   srand(time(NULL));
   for (size_t i = 0; i < n; i++) x[i] = (float)(rand() % 256);
   in.CopyDataFromHostPtr<float>(x, n);
-  size_t resize_height = 4, resize_width = 6;
+  int resize_height = 4, resize_width = 6;
 
   singa::ImageTransformer img_transformer;
   singa::TransformerConf conf;
@@ -156,8 +156,8 @@ TEST(ImageTransformer, Apply2D) {
   EXPECT_EQ(resize_width, resized.size().width);
   size_t new_size = resize_height * resize_width;
   float* xt = new float[new_size];
-  for (size_t i = 0; i < resize_height; i++)
-    for (size_t j = 0; j < resize_width; j++)
+  for (int i = 0; i < resize_height; i++)
+    for (int j = 0; j < resize_width; j++)
         xt[i * resize_width + j] = resized.at<cv::Vec<float, 1>>(i, j)[0];
 
   for (size_t h = 0; h < 2; h++)
@@ -187,7 +187,7 @@ TEST(ImageTransformer, Resize) {
   srand(time(NULL));
   for (size_t i = 0; i < n; i++) x[i] = (float)(rand() % 256);
   in.CopyDataFromHostPtr<float>(x, n);
-  size_t resize_height = 4, resize_width = 5;
+  int resize_height = 4, resize_width = 5;
   singa::Tensor out = singa::resize(in, resize_height, resize_width, "HWC");
   const float* y = out.data<float>();
 
@@ -203,8 +203,8 @@ TEST(ImageTransformer, Resize) {
   EXPECT_EQ(resize_width, resized.size().width);
   size_t new_size = resize_height * resize_width * channel;
   float* xt = new float[new_size];
-  for (size_t i = 0; i < resize_height; i++)
-    for (size_t j = 0; j < resize_width; j++)
+  for (int i = 0; i < resize_height; i++)
+    for (int j = 0; j < resize_width; j++)
       for (size_t k = 0; k < channel; k++)
         xt[i * resize_width * channel + j * channel + k] = resized.at<cv::Vec3f>(i, j)[k];
 
@@ -232,7 +232,7 @@ TEST(ImageTransformer, Crop) {
     for (size_t w = 0; w < crop_width; w++)
       for (size_t c = 0; c < channel; c++) {
         size_t out_idx = c * crop_height * crop_width + h * crop_width + w;
-        size_t in_idx = c * height * width + (h + crop_h_offset) 
+        size_t in_idx = c * height * width + (h + crop_h_offset)
                  * width + w + crop_w_offset;
         EXPECT_EQ(x[in_idx], y[out_idx]);
       }

[04/51] [abbrv] incubator-singa git commit: SINGA-174 Add Batch Normalization layer and Local Response Nomalization layer.

Posted by wa...@apache.org.

SINGA-174 Add Batch Normalization layer and Local Response Nomalization
    layer.

Revise cifar10 example to support batchnormalized vgg model trainning
on CPU.
Now parameters of Batch Normalization layer are 1D tensor
both in GPU and CPU version.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/055ff17b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/055ff17b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/055ff17b

Branch: refs/heads/master
Commit: 055ff17b2a2507ffd411eaf6f281d476152a87dc
Parents: 05720c2
Author: Wang Ji <ij...@gmail.com>
Authored: Thu Aug 11 15:28:38 2016 +0800
Committer: Wang Ji <ij...@gmail.com>
Committed: Thu Aug 11 15:28:38 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/train.py          |  2 +-
 examples/cifar10/vgg.py            | 16 ++++++++--------
 src/model/layer/cudnn_batchnorm.cc | 16 ++++++++--------
 test/singa/test_cudnn_batchnorm.cc | 22 ++++++++--------------
 4 files changed, 25 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index 3285651..9d363cf 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -107,7 +107,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         dev = device.create_cuda_gpu()
 
     net.to_device(dev)
-    opt = optimizer.SGD(momentum=0.9, weight_decay=0.004)
+    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
     for (p, specs) in zip(net.param_values(), net.param_specs()):
         opt.register(p, specs)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 97e690c..cd0f613 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -80,14 +80,14 @@ def create_net(use_cpu=False):
     print 'Start intialization............'
     for (p, name) in zip(net.param_values(), net.param_names()):
         print name, p.shape
-        if len(p.shape) > 1:
-            if 'mean' in name or 'beta' in name:
-                p.set_value(0.0)
-            elif 'var' in name:
-                p.set_value(1.0)
-            elif 'gamma' in name:
-                initializer.uniform(p, 0, 1)
-            elif 'conv' in name:
+        if 'mean' in name or 'beta' in name:
+            p.set_value(0.0)
+        elif 'var' in name:
+            p.set_value(1.0)
+        elif 'gamma' in name:
+            initializer.uniform(p, 0, 1)
+        elif len(p.shape) > 1:
+            if 'conv' in name:
                 initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
             else:
                 initializer.gaussian(p, 0, 0.02)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/src/model/layer/cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_batchnorm.cc b/src/model/layer/cudnn_batchnorm.cc
index 01682b7..f29679c 100644
--- a/src/model/layer/cudnn_batchnorm.cc
+++ b/src/model/layer/cudnn_batchnorm.cc
@@ -39,14 +39,14 @@ void CudnnBatchNorm::ToDevice(std::shared_ptr<Device> device) {
 
 void CudnnBatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
   BatchNorm::Setup(in_sample, conf);
-  bnScale_.Reshape(Shape{1,channels_,1,1});
-  bnBias_.ResetLike(bnScale_);
-  dbnScale_.ResetLike(bnScale_);
-  dbnBias_.ResetLike(bnScale_);
-  runningMean_.ResetLike(bnScale_);
-  runningVariance_.ResetLike(bnScale_);
-  resultSaveMean_.ResetLike(bnScale_);
-  resultSaveVariance_.ResetLike(bnScale_);
+  bnScale_.Reshape(Shape{channels_});
+  bnBias_.Reshape(Shape{channels_});
+  dbnScale_.Reshape(Shape{channels_});
+  dbnBias_.Reshape(Shape{channels_});
+  runningMean_.Reshape(Shape{channels_});
+  runningVariance_.Reshape(Shape{channels_});
+  resultSaveMean_.Reshape(Shape{channels_});
+  resultSaveVariance_.Reshape(Shape{channels_});
 }
 
 void CudnnBatchNorm::InitCudnn(const Shape& shape, DataType dtype) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/test/singa/test_cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_batchnorm.cc b/test/singa/test_cudnn_batchnorm.cc
index b2746dc..b024c19 100644
--- a/test/singa/test_cudnn_batchnorm.cc
+++ b/test/singa/test_cudnn_batchnorm.cc
@@ -152,19 +152,19 @@ TEST(CudnnBatchNorm, Backward) {
   singa::Tensor dy_tensor(singa::Shape{1,2,4,4}, cuda);
   dy_tensor.CopyDataFromHostPtr(dy, 1*2*4*4);
   const float alpha_[] = {1, 1};
-  singa::Tensor alpha(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor alpha(singa::Shape{2}, cuda);
   alpha.CopyDataFromHostPtr(alpha_, 1*2*1*1);
 
   const float beta_[] = {0, 0};
-  singa::Tensor beta(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor beta(singa::Shape{2}, cuda);
   beta.CopyDataFromHostPtr(beta_, 1*2*1*1);
 
   const float mean_[] = {0.0123405, -0.0622333};
-  singa::Tensor mean(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor mean(singa::Shape{2}, cuda);
   mean.CopyDataFromHostPtr(mean_, 1*2*1*1);
 
   const float var_[] = {15.9948, 8.68198};
-  singa::Tensor var(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor var(singa::Shape{2}, cuda);
   var.CopyDataFromHostPtr(var_, 1*2*1*1);
 
   batchnorm.ToDevice(cuda);
@@ -220,11 +220,8 @@ TEST(CudnnBatchNorm, Backward) {
   dbnScale.ToHost();
   const float *dbnScaleptr = dbnScale.data<float>();
   const auto & dbnScaleShape = dbnScale.shape();
-  EXPECT_EQ(4u, dbnScaleShape.size());
-  EXPECT_EQ(1u, dbnScaleShape[0]);
-  EXPECT_EQ(2u, dbnScaleShape[1]);
-  EXPECT_EQ(1u, dbnScaleShape[2]);
-  EXPECT_EQ(1u, dbnScaleShape[3]);
+  EXPECT_EQ(1u, dbnScaleShape.size());
+  EXPECT_EQ(2u, dbnScaleShape[0]);
 
   EXPECT_NEAR(-0.013569f, dbnScaleptr[0], 1e-4f);
   EXPECT_NEAR(-0.00219431f, dbnScaleptr[1], 1e-4f);
@@ -233,11 +230,8 @@ TEST(CudnnBatchNorm, Backward) {
   dbnBias.ToHost();
   const float *dbnBiasptr = dbnBias.data<float>();
   const auto & dbnBiasShape = dbnBias.shape();
-  EXPECT_EQ(4u, dbnBiasShape.size());
-  EXPECT_EQ(1u, dbnBiasShape[0]);
-  EXPECT_EQ(2u, dbnBiasShape[1]);
-  EXPECT_EQ(1u, dbnBiasShape[2]);
-  EXPECT_EQ(1u, dbnBiasShape[3]);
+  EXPECT_EQ(1u, dbnBiasShape.size());
+  EXPECT_EQ(2u, dbnBiasShape[0]);
 
   EXPECT_NEAR(-0.0322803f, dbnBiasptr[0], 1e-4f);
   EXPECT_NEAR(0.0161278f, dbnBiasptr[1], 1e-4f);

[42/51] [abbrv] incubator-singa git commit: SINGA-227 Add Split and Merge Layer and add ResNet Implementation

Posted by wa...@apache.org.

SINGA-227 Add Split and Merge Layer and add ResNet Implementation

Update the resnet implementation by adding Merge and Split layers in
layer.py, and enable net.py to process merge/split layers.

Update the transpose setting in Dense.cc
TODO(wangwei) update test_dense.cc


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a54c889a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a54c889a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a54c889a

Branch: refs/heads/master
Commit: a54c889afa0401e8e1597f83764f217fc35753b4
Parents: 7ebea53
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 18 00:00:59 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 18 00:00:59 2016 +0800

----------------------------------------------------------------------
 doc/en/docs/installation.md |  11 +-
 examples/char-rnn/README.md |   7 +-
 examples/char-rnn/sample.py |  34 ++--
 examples/cifar10/README.md  |   8 +
 examples/cifar10/resnet.py  | 328 +++++----------------------------------
 examples/cifar10/train.py   |   2 +-
 examples/mnist/README.md    |   4 +-
 src/model/layer/dense.cc    |  16 +-
 src/model/layer/merge.cc    |  19 +--
 src/model/layer/merge.h     |  35 +++--
 src/model/layer/split.cc    |   5 +-
 src/model/layer/split.h     |  13 +-
 src/proto/model.proto       |   6 -
 src/python/singa/layer.py   |  77 ++++++++-
 src/python/singa/net.py     | 119 +++++++++++---
 test/singa/test_dense.cc    |  47 +++---
 16 files changed, 325 insertions(+), 406 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/doc/en/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/installation.md b/doc/en/docs/installation.md
index bff8e89..9f112f4 100755
--- a/doc/en/docs/installation.md
+++ b/doc/en/docs/installation.md
@@ -64,6 +64,8 @@ Then, run the following command
     $ sudo pip install --upgrade $SINGA_WHEEL_URL
 
 If you do not have sudo right, you can run `pip install` in a python virtual environment.
+Note that in python virtual environment, you may need to reset the `PYTHONPATH` to empty
+to avoid the conflicts of system path and virtual environment path.
 
 
 ### From source
@@ -83,8 +85,9 @@ Developers can build the wheel file via
     $ cd python
     $ python setup.py bdist_wheel
 
-
-The generated wheel file is under "dist" directory
+The generated wheel file is under "dist" directory.
+To build cnmem into the wheel file, please change CMakeLists.txt by replacing
+'SHARED' with 'STATIC'.
 
 
 ## Build SINGA from source
@@ -224,3 +227,7 @@ To be added.
 
     After this, you can build glog again.
 
+* Q: When using virtual environment, everytime I run pip install, it would reinstall numpy. However, the numpy would not be used when I `import numpy`
+
+    A: It could be caused by the `PYTHONPATH` which should be set to empty when you are using virtual environment to avoid the conflicts with the path of
+    the virtual environment.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/examples/char-rnn/README.md
----------------------------------------------------------------------
diff --git a/examples/char-rnn/README.md b/examples/char-rnn/README.md
index f6e5edc..dcaf652 100644
--- a/examples/char-rnn/README.md
+++ b/examples/char-rnn/README.md
@@ -19,7 +19,7 @@ Other plain text files can also be used.
 
 * Start the training,
 
-        python train.py input_linux.txt
+        python train.py linux_input.txt
 
   Some hyper-parameters could be set through command line,
 
@@ -27,4 +27,7 @@ Other plain text files can also be used.
 
 * Sample characters from the model by providing the number of characters to sample and the seed string.
 
-        python sample.py 100 --seed '#include <std'
+        python sample.py 'model.bin' 100 --seed '#include <std'
+
+  Please replace 'model.bin' with the path to one of the checkpoint paths.
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/examples/char-rnn/sample.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/sample.py b/examples/char-rnn/sample.py
index 8147732..bbfb28f 100644
--- a/examples/char-rnn/sample.py
+++ b/examples/char-rnn/sample.py
@@ -16,12 +16,11 @@
 # =============================================================================
 '''Sample characters from the pre-trained model'''
 import sys
-import os
 import cPickle as pickle
 import numpy as np
 import argparse
 
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
 from singa import tensor
 from singa import device
@@ -30,10 +29,10 @@ from singa.proto import model_pb2
 
 def sample(model_path, nsamples=100, seed_text='', do_sample=True):
     with open(model_path, 'rb') as fd:
-        d=pickle.load(fd)
+        d = pickle.load(fd)
         rnn_w = tensor.from_numpy(d['rnn_w'])
-        idx_to_char=d['idx_to_char']
-        char_to_idx=d['char_to_idx']
+        idx_to_char = d['idx_to_char']
+        char_to_idx = d['char_to_idx']
         vocab_size = len(idx_to_char)
         dense_w = tensor.from_numpy(d['dense_w'])
         dense_b = tensor.from_numpy(d['dense_b'])
@@ -43,8 +42,8 @@ def sample(model_path, nsamples=100, seed_text='', do_sample=True):
 
     cuda = device.create_cuda_gpu()
     rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,
-            num_stacks=num_stacks, dropout=dropout,
-            input_sample_shape=(len(idx_to_char),))
+                     num_stacks=num_stacks, dropout=dropout,
+                     input_sample_shape=(len(idx_to_char),))
     rnn.to_device(cuda)
     rnn.param_values()[0].copy_data(rnn_w)
     dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))
@@ -59,10 +58,10 @@ def sample(model_path, nsamples=100, seed_text='', do_sample=True):
         for c in seed_text:
             x = np.zeros((1, vocab_size), dtype=np.float32)
             x[0, char_to_idx[c]] = 1
-            tx=tensor.from_numpy(x)
+            tx = tensor.from_numpy(x)
             tx.to_device(cuda)
-            inputs=[tx, hx, cx]
-            outputs=rnn.forward(model_pb2.kEval, inputs)
+            inputs = [tx, hx, cx]
+            outputs = rnn.forward(model_pb2.kEval, inputs)
             y = dense.forward(model_pb2.kEval, outputs[0])
             y = tensor.softmax(y)
             hx = outputs[1]
@@ -76,16 +75,16 @@ def sample(model_path, nsamples=100, seed_text='', do_sample=True):
         y.to_host()
         prob = tensor.to_numpy(y)[0]
         if do_sample:
-            cur=np.random.choice(vocab_size, 1, p=prob)[0]
+            cur = np.random.choice(vocab_size, 1, p=prob)[0]
         else:
             cur = np.argmax(prob)
         sys.stdout.write(idx_to_char[cur])
         x = np.zeros((1, vocab_size), dtype=np.float32)
         x[0, cur] = 1
-        tx=tensor.from_numpy(x)
+        tx = tensor.from_numpy(x)
         tx.to_device(cuda)
-        inputs=[tx, hx, cx]
-        outputs=rnn.forward(model_pb2.kEval, inputs)
+        inputs = [tx, hx, cx]
+        outputs = rnn.forward(model_pb2.kEval, inputs)
         y = dense.forward(model_pb2.kEval, outputs[0])
         y = tensor.softmax(y)
         hx = outputs[1]
@@ -94,9 +93,10 @@ def sample(model_path, nsamples=100, seed_text='', do_sample=True):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='sample chars from char-rnn')
-    parser.add_argument('--seed', help='seed text string which warms up the rnn'\
-            ' states for sampling', default='')
+    parser.add_argument('model', type=int, help='the model checkpoint file')
     parser.add_argument('n', type=int, help='num of characters to sample')
+    parser.add_argument('--seed', help='seed text string which warms up the '
+                        ' rnn states for sampling', default='')
     args = parser.parse_args()
     assert args.n > 0, 'n must > 0'
-    sample('model.bin', args.n, seed_text=args.seed)
+    sample(args.model, args.n, seed_text=args.seed)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/examples/cifar10/README.md
----------------------------------------------------------------------
diff --git a/examples/cifar10/README.md b/examples/cifar10/README.md
index 5333e6f..8076347 100644
--- a/examples/cifar10/README.md
+++ b/examples/cifar10/README.md
@@ -21,7 +21,15 @@ Users can compile and install SINGA from source or install the Python version.
 The code can ran on both CPU and GPU. For GPU training, CUDA and CUDNN (V4 or V5)
 are required. Please refer to the installation page for detailed instructions.
 
+### Data preparation
 
+The binary Cifar-10 dataset could be downloaded by
+
+    python download_data.py bin
+
+The Python version could be downloaded by
+
+    python download_data.py py
 
 ### Training
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/examples/cifar10/resnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/resnet.py b/examples/cifar10/resnet.py
index c9b3e2b..477c5c7 100644
--- a/examples/cifar10/resnet.py
+++ b/examples/cifar10/resnet.py
@@ -14,323 +14,65 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-""" The resnet model is adapted from http://torch.ch/blog/2016/02/04/resnets.html
+"""The resnet model is adapted from http://torch.ch/blog/2016/02/04/resnets.html
 The best validation accuracy we achieved is about 83% without data augmentation.
 The performance could be improved by tuning some hyper-parameters, including
 learning rate, weight decay, max_epoch, parameter initialization, etc.
 """
 
-import sys
-import os
-import math
 import cPickle as pickle
 
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 # use the python modules by installing py singa in build/python
 # pip install -e .
 
-from singa import tensor
 from singa import layer
 from singa import initializer
 from singa import metric
 from singa import loss
 from singa import net as ffnet
-from singa.proto.model_pb2 import kTrain, kEval
 
-class ResNet(object):
 
-    def __init__(self, loss=None, metric=None):
-        self.loss = loss
-        self.metric = metric
-        self.layers = []
-        self.src_layers = {}
-        self.dst_layers = {}
-        self.layer_shapes = {}
-        self.layer_names = []
-
-    def to_device(self, dev):
-        for lyr in self.layers:
-            lyr.to_device(dev)
-
-    def find(self, name):
-        for i in xrange(len(self.layers)):
-            if self.layers[i].name == name:
-                return self.layers[i]
-        assert False, "Undefined layer %s." % name
-        return None
-
-    def add(self, lyr, src_lyr_name=''):
-        """Append a layer into the layer list.
-        This function will get the sample shape from the last layer to setup
-        the newly added layer. For the first layer, it is setup outside.
-        The calling function should ensure the correctness of the layer order.
-        Args:
-            lyr (Layer): the layer to be added
-            src_lyr_name: list type, name of the src layer to the current layer
-        """
-        if len(self.layers) > 0 and lyr.has_setup is False:
-            #assert src_lyr_name in dst_layers, "Undefined src layer %s" % src_lyr_name
-            shape = self.layer_shapes[src_lyr_name]
-            lyr.setup(shape)
-        print lyr.name, ': ', lyr.get_output_sample_shape()
-        if src_lyr_name != '':
-            self.src_layers[lyr.name] = [src_lyr_name]
-        self.layers.append(lyr)
-        self.layer_shapes[lyr.name] = lyr.get_output_sample_shape()            
-        self.layer_names.append(lyr.name)
-
-        if src_lyr_name != '':
-            if src_lyr_name in self.dst_layers:
-                self.dst_layers[src_lyr_name].append(lyr.name)
-            else:
-                self.dst_layers[src_lyr_name] = [lyr.name]
-        if lyr.name in self.src_layers:
-            print 'src: ', self.src_layers[lyr.name]
-        else:
-            print 'src: null'
-        #print self.layer_names
-        print "----------------------------------------"
-
-    def add_split(self, lyr_name, src_lyr_name):
-        assert src_lyr_name in self.layer_shapes, "Undefined src layer %s." % src_lyr_name
-        self.src_layers[lyr_name] = [src_lyr_name]
-        self.layer_shapes[lyr_name] = self.layer_shapes[src_lyr_name]
-        self.layer_names.append(lyr_name)
-        if src_lyr_name in self.dst_layers:
-            self.dst_layers[src_lyr_name].append(lyr_name)
-        else:
-            self.dst_layers[src_lyr_name] = [lyr_name]
-        print lyr_name, ': ', self.layer_shapes[lyr_name]
-        if lyr_name in self.src_layers:
-            print 'src: ', self.src_layers[lyr_name]
-        else:
-            print 'src: null'
-        print "----------------------------------------"
-   
-    def add_merge(self, lyr_name, src_lyr_names):
-        self.src_layers[lyr_name] = src_lyr_names
-        self.layer_shapes[lyr_name] = self.layer_shapes[src_lyr_names[0]]
-        self.layer_names.append(lyr_name)
-        for i in xrange(len(src_lyr_names)):
-            if src_lyr_names[i] in self.dst_layers:
-                self.dst_layers[src_lyr_names[i]].append(lyr_name)
-            else:
-                self.dst_layers[src_lyr_names[i]] = [lyr_name]
-        print lyr_name, ': ', self.layer_shapes[lyr_name]
-        if lyr_name in self.src_layers:
-            print 'src: ', self.src_layers[lyr_name]
-        else:
-            print 'src: null'
-        print "----------------------------------------"
-
-    def param_values(self):
-        values = []
-        for lyr in self.layers:
-            values.extend(lyr.param_values())
-        return values
-
-    def param_specs(self):
-        specs = []
-        for lyr in self.layers:
-            specs.extend(lyr.param_specs)
-        return specs
-
-    def param_names(self):
-        return [spec.name for spec in self.param_specs()]
-
-    def train(self, x, y):
-        out = self.forward(kTrain, x)
-        l = self.loss.forward(kTrain, out, y)
-        if self.metric is not None:
-            m = self.metric.evaluate(out, y)
-        return self.backward(), (l.l1(), m)
-
-    def evaluate(self, x, y):
-        """Evaluate the loss and metric of the given data"""
-        out = self.forward(kEval, x)
-        l = None
-        m = None
-        assert self.loss is not None or self.metric is not None,\
-            'Cannot do evaluation, as neither loss nor metic is set'
-        if self.loss is not None:
-            l = self.loss.evaluate(kEval, out, y)
-        if self.metric is not None:
-            m = self.metric.evaluate(out, y)
-        return l, m
-
-    def predict(self, x):
-        xx = self.forward(kEval, x)
-        return tensor.softmax(xx)
-
-    def forward(self, flag, x):
-        #print x.l1()
-        outputs = {'': x}
-        for idx, name in enumerate(self.layer_names):
-            #print 'forward layer', name
-            if idx == 0:
-                outputs[name] = self.find(name).forward(flag, outputs[''])
-                del outputs['']
-                continue
-
-            if 'split' in name:
-                src = self.src_layers[name][0]
-                #print 'src: ', src
-                outputs[name] = []
-                for i in xrange(len(self.dst_layers[name])):
-                    outputs[name].append(outputs[src])
-                del outputs[src]
-            elif 'merge' in name:
-                srcs = self.src_layers[name]
-                #print 'src: ', srcs
-                for i in xrange(len(srcs)):
-                    if 'split' in srcs[i]:
-                       if i > 0:
-                            data += outputs[srcs[i]][0]
-                       else:
-                            data = outputs[srcs[i]][0]
-                       del outputs[srcs[i]][0]
-                       if len(outputs[srcs[i]]) == 0:
-                           del outputs[srcs[i]]
-                    else:
-                        if i > 0:
-                            data += outputs[srcs[i]]
-                        else:
-                            data = outputs[srcs[i]]
-                        del outputs[srcs[i]]
-                outputs[name] = data
-            else:
-                src = self.src_layers[name][0]
-                #print 'src: ', src
-                if 'split' in src:
-                    outputs[name] = self.find(name).forward(flag, outputs[src][0])
-                    del outputs[src][0]
-                    if len(outputs[src]) == 0:
-                        del outputs[src]
-                else:
-                    outputs[name] = self.find(name).forward(flag, outputs[src])
-                    del outputs[src]
-                
-        #    print lyr.name, x.l1()
-        return outputs[name]
-
-    def backward(self, flag=kTrain):
-        grad = self.loss.backward()
-        pgrads = []
-        in_grads = {'': grad}
-        for idx, name in enumerate(reversed(self.layer_names)):
-            #print 'backward layer', name
-            if idx == 0:
-                lyr = self.find(name)
-                grad, _pgrads = lyr.backward(flag, in_grads[''])
-                for g in reversed(_pgrads):
-                    pgrads.append(g)
-                in_grads[name] = grad
-                del in_grads['']
-                continue
-
-            if 'merge' in name:
-                src = self.dst_layers[name][0]
-                #print 'src: ', src
-                in_grads[name] = []
-                for i in xrange(len(self.src_layers[name])):
-                    in_grads[name].append(in_grads[src])
-                del in_grads[src]
-            elif 'split' in name:
-                srcs = self.dst_layers[name]
-                #print 'src: ', srcs
-                for i in xrange(len(srcs)):
-                    if 'merge' in srcs[i]:
-                       if i > 0:
-                            data += in_grads[srcs[i]][0]
-                       else:
-                            data = in_grads[srcs[i]][0]
-                       del in_grads[srcs[i]][0]
-                       if len(in_grads[srcs[i]]) == 0:
-                           del in_grads[srcs[i]]
-                    else:
-                        if i > 0:
-                            data += in_grads[srcs[i]]
-                        else:
-                            data = in_grads[srcs[i]]
-                        del in_grads[srcs[i]]
-                in_grads[name] = data
-            else:
-                src = self.dst_layers[name][0]
-                #print 'src: ', src
-                if 'merge' in src:
-                    grad, _pgrads = self.find(name).backward(flag, in_grads[src][0])
-                    del in_grads[src][0]
-                    if len(in_grads[src]) == 0:
-                        del in_grads[src]
-                else:
-                    grad, _pgrads = self.find(name).backward(flag, in_grads[src])
-                    del in_grads[src]
-                for g in reversed(_pgrads):
-                    pgrads.append(g)
-                in_grads[name] = grad
-
-
-        return reversed(pgrads)
-
-    def save(self, f):
-        """Save model parameters using cpickle"""
-        params = {}
-        for (specs, val) in zip(self.param_specs(), self.param_values()):
-            val.to_host()
-            params[specs.name] = tensor.to_numpy(val)
-        with open(f, 'wb') as fd:
-            pickle.dump(params, fd)
-
-    def load(self, f):
-        """Load model parameters using cpickle"""
-        with open(f, 'rb') as fd:
-            params = pickle.load(fd)
-        for (specs, val) in zip(self.param_specs(), self.param_values()):
-            val.copy_from_numpy(params[specs.name])
-
-def Block(net, name, nb_filters, stride, std, src):
-    #net.add(layer.Split("split" + name, 2), srcs)
-    net.add_split("split" + name, src)
+def Block(net, name, nb_filters, stride):
+    split = net.add(layer.Split(name + "-split", 2))
     if stride > 1:
-        net.add(layer.Conv2D("conv" + name + "_br1", nb_filters, 1, stride, pad=0), "split" + name)
-        net.add(layer.BatchNormalization("bn" + name + "_br1"), "conv" + name + "_br1")
-        net.add(layer.Conv2D("conv" + name + "_br2a", nb_filters, 3, stride, pad=1), "split" + name)
-    else:
-        net.add(layer.Conv2D("conv" + name + "_br2a", nb_filters, 3, stride, pad=1), "split" + name)
-    net.add(layer.BatchNormalization("bn" + name + "_br2a"), "conv" + name + "_br2a")
-    net.add(layer.Activation("relu" + name + "_br2a"), "bn" + name + "_br2a")
-    net.add(layer.Conv2D("conv" + name + "_br2b", nb_filters, 3, 1, pad=1), "relu" + name + "_br2a")
-    net.add(layer.BatchNormalization("bn" + name + "_br2b"), "conv" + name + "_br2b")
+        net.add(layer.Conv2D(name + "-br2-conv", nb_filters, 1, stride, pad=0), split)
+        br2bn = net.add(layer.BatchNormalization(name + "-br2-bn"))
+    net.add(layer.Conv2D(name + "-br1-conv1", nb_filters, 3, stride, pad=1), split)
+    net.add(layer.BatchNormalization(name + "-br1-bn1"))
+    net.add(layer.Activation(name + "-br1-relu"))
+    net.add(layer.Conv2D(name + "-br1-conv2", nb_filters, 3, 1, pad=1))
+    br1bn2 = net.add(layer.BatchNormalization(name + "-br1-bn2"))
     if stride > 1:
-        net.add_merge("merge" + name, ["bn" + name + "_br1", "bn" + name + "_br2b"])
+        net.add(layer.Merge(name + "-merge"), [br1bn2, br2bn])
     else:
-        net.add_merge("merge" + name, ["split" + name, "bn" + name + "_br2b"])
+        net.add(layer.Merge(name + "-merge"), [br1bn2, split])
+
 
 def create_net():
-    net = ResNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
+    net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     net.add(layer.Conv2D("conv1", 16, 3, 1, pad=1, input_sample_shape=(3, 32, 32)))
-    net.add(layer.BatchNormalization("bn1"), "conv1")
-    net.add(layer.Activation("relu1"), "bn1")
-   
-    Block(net, "2a", 16, 1, 0.01, "relu1")
-    Block(net, "2b", 16, 1, 0.01, "merge2a")
-    Block(net, "2c", 16, 1, 0.01, "merge2b")
+    net.add(layer.BatchNormalization("bn1"))
+    net.add(layer.Activation("relu1"))
+
+    Block(net, "2a", 16, 1)
+    Block(net, "2b", 16, 1)
+    Block(net, "2c", 16, 1)
 
-    Block(net, "3a", 32, 2, 0.01, "merge2c")
-    Block(net, "3b", 32, 1, 0.01, "merge3a")
-    Block(net, "3c", 32, 1, 0.01, "merge3b")
+    Block(net, "3a", 32, 2)
+    Block(net, "3b", 32, 1)
+    Block(net, "3c", 32, 1)
 
-    Block(net, "4a", 64, 2, 0.01, "merge3c")
-    Block(net, "4b", 64, 1, 0.01, "merge4a")
-    Block(net, "4c", 64, 1, 0.01, "merge4b")
+    Block(net, "4a", 64, 2)
+    Block(net, "4b", 64, 1)
+    Block(net, "4c", 64, 1)
 
-    net.add(layer.AvgPooling2D("pool4", 8, 8, border_mode='valid'), "merge4c")
-    net.add(layer.Flatten('flat'), "pool4")
-    net.add(layer.Dense('ip5', 10), "flat")
-    net.add(layer.Softmax('softmax'), "ip5")
+    net.add(layer.AvgPooling2D("pool4", 8, 8, border_mode='valid'))
+    net.add(layer.Flatten('flat'))
+    net.add(layer.Dense('ip5', 10))
     print 'Start intialization............'
     for (p, name) in zip(net.param_values(), net.param_names()):
-        print name, p.shape
+        # print name, p.shape
         if 'mean' in name or 'beta' in name:
             p.set_value(0.0)
         elif 'var' in name:
@@ -339,12 +81,12 @@ def create_net():
             initializer.uniform(p, 0, 1)
         elif len(p.shape) > 1:
             if 'conv' in name:
-                #initializer.gaussian(p, 0, math.sqrt(2.0/p.shape[1]))
-                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0*p.shape[0])))
+                # initializer.gaussian(p, 0, math.sqrt(2.0/p.shape[1]))
+                initializer.gaussian(p, 0, 9.0 * p.shape[0])
             else:
-                initializer.gaussian(p, 0, 0.02)
+                initializer.uniform(p, p.shape[0], p.shape[1])
         else:
             p.set_value(0)
-        print name, p.l1()
+        # print name, p.l1()
 
     return net

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index 6b7631e..b08ae3c 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -180,6 +180,6 @@ if __name__ == '__main__':
         train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005,
               use_cpu=args.use_cpu)
     else:
-        train_x, test_x = normalize_for_vgg(train_x, test_x)
+        train_x, test_x = normalize_for_alexnet(train_x, test_x)
         net = resnet.create_net()
         train((train_x, train_y, test_x, test_y), net, 200, resnet_lr, 1e-4)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/examples/mnist/README.md
----------------------------------------------------------------------
diff --git a/examples/mnist/README.md b/examples/mnist/README.md
index 9f59e7e..60a85e0 100644
--- a/examples/mnist/README.md
+++ b/examples/mnist/README.md
@@ -10,9 +10,9 @@ MNIST dataset. The RBM model and its hyper-parameters are set following
 
 2. Start the training
 
-        python train.py
+        python train.py mnist.pkl.gz
 
 By default the training code would run on CPU. To run it on a GPU card, please start
 the program with an additional argument
 
-        python train.py --use_gpu
+        python train.py mnist.pkl.gz --use_gpu

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 7470154..64e3d86 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -38,10 +38,10 @@ void Dense::Setup(const Shape& in_sample, const LayerConf &conf) {
   vdim_ = in_sample.at(0);
   hdim_ = dense_conf.num_output();
   transpose_ = dense_conf.transpose();
-  if (transpose_)
-    weight_.Reshape(Shape{vdim_, hdim_});
-  else
+  if (transpose_)  // was {vdim_, hdim} by zhaojing?
     weight_.Reshape(Shape{hdim_, vdim_});
+  else
+    weight_.Reshape(Shape{vdim_, hdim_});
   bias_.Reshape(Shape{hdim_});
   for (auto specs: conf.param())
     param_specs_.push_back(specs);
@@ -53,9 +53,9 @@ const Tensor Dense::Forward(int flag, const Tensor &input) {
   Tensor output;
   CHECK_EQ(input.nDim(), 2u);
   if (transpose_)  // use the transposed version of weight_ for computing
-    output = Mult(input, weight_);
-  else
     output = Mult(input, weight_.T());
+  else
+    output = Mult(input, weight_);
   AddRow(bias_, &output);
   if (flag & kTrain)
     buf_.push(input);
@@ -75,11 +75,11 @@ const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag,
   dx.ResetLike(src_data);
   SumRows(grad, &db);
   if (transpose_) {
-    dx = Mult(grad, weight_.T());
-    dw = Mult(src_data.T(), grad);
-  } else {
     dx = Mult(grad, weight_);
     dw = Mult(grad.T(), src_data);
+  } else {
+    dx = Mult(grad, weight_.T());
+    dw = Mult(src_data.T(), grad);
   }
   param_grad.push_back(dw);
   param_grad.push_back(db);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/model/layer/merge.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/merge.cc b/src/model/layer/merge.cc
index a30c3b3..a517024 100644
--- a/src/model/layer/merge.cc
+++ b/src/model/layer/merge.cc
@@ -21,22 +21,25 @@
 namespace singa {
 
 RegisterLayerClass(singa_merge, Merge);
+RegisterLayerClass(singacpp_merge, Merge);
+RegisterLayerClass(singacuda_merge, Merge);
+RegisterLayerClass(singacl_merge, Merge);
 
 void Merge::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
-  MergeConf merge_conf = conf.merge_conf();
-  input_size_ = merge_conf.input_size();
   out_sample_shape_ = in_sample;
 }
 
 const vector<Tensor> Merge::Forward(int flag, const vector<Tensor>& inputs) {
   vector<Tensor> outputs;
-  //input_size_ = inputs.size();
-  if (input_size_ == 1u) {
+  input_size_ = inputs.size();
+  if (inputs.size() == 1u) {
     outputs = inputs;
   } else {
-    Tensor sum = inputs.at(0);
-    for (size_t i = 1; i < inputs.size(); i++) {
+    Tensor sum;
+    sum.ResetLike(inputs.at(0));
+    sum.SetValue(0.0f);
+    for (size_t i = 0; i < inputs.size(); i++) {
       Tensor temp = inputs.at(i);
       CHECK_EQ(sum.nDim(), temp.nDim());
       for (size_t j = 0; j < temp.nDim(); j++)
@@ -51,9 +54,7 @@ const vector<Tensor> Merge::Forward(int flag, const vector<Tensor>& inputs) {
 const std::pair<vector<Tensor>, vector<Tensor>> Merge::Backward(
     int flag, const vector<Tensor>& grads) {
   vector<Tensor> input_grad, param_grad;
-  if (grads.size() != 1u) {
-    LOG(INFO) << "Merge layer only have one output tensor.";
-  }
+  CHECK_EQ(grads.size(), 1u) << "Merge layer only have one output tensor.";
   for (size_t i = 0; i < input_size_; i++)
     input_grad.push_back(grads.at(0));
   return std::make_pair(input_grad, param_grad);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/model/layer/merge.h
----------------------------------------------------------------------
diff --git a/src/model/layer/merge.h b/src/model/layer/merge.h
index 9c34192..c709d69 100644
--- a/src/model/layer/merge.h
+++ b/src/model/layer/merge.h
@@ -23,30 +23,31 @@
 #include "singa/model/layer.h"
 
 namespace singa {
+/// Sum features of all input layers
 class Merge : public Layer {
  public:
-  /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Merge"; }
+  // const std::string layer_type() const override { return "Merge"; }
 
-  /// \copydoc Layer::Setup(const LayerConf&);
-  void Setup(const Shape& in_sample, const LayerConf& conf) override;
-  const Shape GetOutputSampleShape() const override {
-    CHECK(out_sample_shape_.size()) << "You may haven't call Setup()";
-    return out_sample_shape_;
-  }
-  /// \copydoc Layer::Forward(int flag, const vector<Tensor>&)
-  const vector<Tensor> Forward(int flag, const vector<Tensor>& inputs) override;
+   /// the sample shape of all input tesnors should be the same
+   void Setup(const Shape &in_sample, const LayerConf &conf) override;
+   const Shape GetOutputSampleShape() const override {
+     CHECK(out_sample_shape_.size()) << "You may haven't call Setup()";
+     return out_sample_shape_;
+   }
+   /// Sum all tensors in 'inputs'
+   /// Return a vector including the result of the summation
+   const vector<Tensor> Forward(int flag,
+                                const vector<Tensor> &inputs) override;
 
-  /// \copydoc Layer::Backward(int, const vector<Tensor>&);
-  const std::pair<vector<Tensor>, vector<Tensor>> Backward(int flag,
-                                                   const vector<Tensor>& grads) override;
-
-  const size_t input_size() const { return input_size_; }
+   /// 'grads' should include only one tensor
+   /// the first result vector includes the gradients for each input layer
+   /// the second result vector is empty
+   const std::pair<vector<Tensor>, vector<Tensor> >
+   Backward(int flag, const vector<Tensor> &grads) override;
 
  protected:
-  // To store the input and output(of forward) tensors
   Shape out_sample_shape_;
-  size_t input_size_;
+  size_t input_size_ = 1u;
 };
 }  // namespace singa
 #endif  // SRC_MODEL_LAYER_MERGE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/model/layer/split.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/split.cc b/src/model/layer/split.cc
index fd1ab7d..6b38a2b 100644
--- a/src/model/layer/split.cc
+++ b/src/model/layer/split.cc
@@ -31,8 +31,7 @@ void Split::Setup(const Shape& in_sample, const LayerConf& conf) {
 
 const vector<Tensor> Split::Forward(int flag, const vector<Tensor>& inputs) {
   vector<Tensor> outputs;
-  if (inputs.size() != 1)
-    LOG(FATAL) << "Split layer only have one input tensor.";
+  CHECK_EQ(inputs.size(), 1u) << "Split layer only have one input tensor.";
   for (size_t i = 0; i < output_size_; i++)
     outputs.push_back(inputs.at(0));
   return outputs;
@@ -42,7 +41,7 @@ const std::pair<vector<Tensor>, vector<Tensor>> Split::Backward(
     int flag, const vector<Tensor>& grads) {
   vector<Tensor> input_grad, param_grad;
   CHECK_EQ(grads.size(), output_size_);
-  
+
   /// Input_grad is the sum of all the output gradients.
   Tensor temp = grads.at(0);
   for (size_t i = 1; i < output_size_; i++)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/model/layer/split.h
----------------------------------------------------------------------
diff --git a/src/model/layer/split.h b/src/model/layer/split.h
index 79e70f6..d4fd58a 100644
--- a/src/model/layer/split.h
+++ b/src/model/layer/split.h
@@ -23,10 +23,12 @@
 #include "singa/model/layer.h"
 
 namespace singa {
+/// Duplicate the input into multiple outputs
+/// need to configure the number of outputs
 class Split : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Split"; }
+  // const std::string layer_type() const override { return "Split"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;
@@ -34,12 +36,13 @@ class Split : public Layer {
     CHECK(out_sample_shape_.size()) << "You may haven't call Setup()";
     return out_sample_shape_;
   }
-  /// \copydoc Layer::Forward(int flag, const vector<Tensor>&)
-  const vector<Tensor> Forward(int flag, const vector<Tensor>& inputs) override;
+  /// The inputs should have only one Tensor
+  /// The outputs is a set of replicated Tensor
+  const vector<Tensor> Forward(int flag, const vector<Tensor> &inputs) override;
 
   /// \copydoc Layer::Backward(int, const vector<Tensor>&);
-  const std::pair<vector<Tensor>, vector<Tensor>> Backward(int flag,
-                                                   const vector<Tensor>& grads) override;
+  const std::pair<vector<Tensor>, vector<Tensor> >
+  Backward(int flag, const vector<Tensor> &grads) override;
 
   const size_t output_size() const { return output_size_; }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 1796e9c..3df68e2 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -242,7 +242,6 @@ message LayerConf {
   optional MetricConf metric_conf = 200;
   optional BatchNormConf batchnorm_conf = 202;
   optional SplitConf split_conf = 203;
-  optional MergeConf merge_conf = 204;
 }
 
 // Message that stores hyper-parameters used to apply transformation
@@ -955,8 +954,3 @@ message SplitConf {
   // Indicate the number of outputs
   optional int32 output_size = 1 [default = 2];
 }
-
-message MergeConf {
-  // Indicate the number of outputs
-  optional int32 input_size = 1 [default = 2];
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index 86ba836..f22b3d1 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -132,7 +132,10 @@ class Layer(object):
         Returns:
             a list of tensors, one for each paramter
         '''
-        return tensor.from_raw_tensors(self.layer.param_values())
+        if self.layer is None:
+            return []
+        else:
+            return tensor.from_raw_tensors(self.layer.param_values())
 
     def forward(self, flag, x):
         '''Forward propagate through this layer.
@@ -194,7 +197,8 @@ class Layer(object):
         Args:
             device: swig converted device, created using singa.device
         '''
-        self.layer.ToDevice(device)
+        if self.layer is not None:
+            self.layer.ToDevice(device)
 
     def as_type(self, dtype):
         pass
@@ -622,6 +626,75 @@ class Flatten(Layer):
             self.setup(input_sample_shape)
 
 
+class Merge(Layer):
+    '''Sum all input tensors.
+
+    Args:
+        input_sample_shape: sample shape of the input. The sample shape of all
+            inputs should be the same.
+    '''
+    def __init__(self, name, input_sample_shape=None):
+        self.in_shape = input_sample_shape
+        self.num_input = 1
+        super(Merge, self).__init__(name)
+
+    def setup(self, in_shape):
+        self.in_shape = in_shape
+        self.has_setup = True
+
+    def get_output_sample_shape(self):
+        return self.in_shape
+
+    def forward(self, flag, inputs):
+        assert len(inputs) > 1, 'There must be multiple input tensors'
+        self.num_input = len(inputs)
+        output = tensor.Tensor()
+        output.reset_like(inputs[0])
+        output.set_value(0)
+        for x in inputs:
+            output += x
+        return output
+
+    def backward(self, flag, grad):
+        assert isinstance(grad, tensor.Tensor), 'The input must be Tensor'
+        return [grad], []  # * self.num_input
+
+
+class Split(Layer):
+    '''Replicate the input tensor.
+
+    Args:
+        num_output (int): number of output tensors to generate.
+        input_sample_shape: includes a single integer for the input sample
+            feature size.
+    '''
+    def __init__(self, name, num_output, input_sample_shape=None):
+        self.num_output = num_output
+        self.in_shape = input_sample_shape
+        super(Split, self).__init__(name)
+
+    def setup(self, in_shape):
+        self.in_shape = in_shape
+        self.has_setup = True
+
+    def get_output_sample_shape(self):
+        return self.in_shape
+
+    def forward(self, flag, input):
+        assert isinstance(input, tensor.Tensor), 'The input must be Tensor'
+        outputs = [input] * self.num_output
+        return outputs
+
+    def backward(self, flag, grads):
+        assert len(grads) > 1, 'There must be multiple gradients'
+        dx = tensor.Tensor()
+        dx.reset_like(grads[0])
+        dx.set_value(0)
+        for g in grads:
+            dx += g
+        return dx, []
+
+
 class RNN(Layer):
     '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
index 3a1732c..0026953 100644
--- a/src/python/singa/net.py
+++ b/src/python/singa/net.py
@@ -22,6 +22,7 @@ functions for net info, e.g., parameters.
 
 from .proto.model_pb2 import kTrain, kEval
 import tensor
+import layer
 import cPickle as pickle
 
 
@@ -31,12 +32,15 @@ class FeedForwardNet(object):
         self.loss = loss
         self.metric = metric
         self.layers = []
+        self.src_of_layer = {}
+        self.dst_of_layer = None
+        self.ordered_layers = None
 
     def to_device(self, dev):
         for lyr in self.layers:
             lyr.to_device(dev)
 
-    def add(self, lyr):
+    def add(self, lyr, src=None):
         """Append a layer into the layer list.
 
         This function will get the sample shape from the last layer to setup
@@ -46,21 +50,44 @@ class FeedForwardNet(object):
         Args:
             lyr (Layer): the layer to be added
         """
-        if len(self.layers) > 0 and lyr.has_setup is False:
-            shape = self.layers[-1].get_output_sample_shape()
-            #print shape
-            lyr.setup(shape)
+        if src is not None:
+            if isinstance(src, layer.Layer):
+                assert src.has_setup is True, 'the source layer must be set up'
+                self.src_of_layer[lyr.name] = [src]
+            else:
+                assert type(src) == list, 'the src must be a list of layers'
+                self.src_of_layer[lyr.name] = src
+                # print 'merge------', len(src)
+        else:
+            assert len(self.layers) > 0 or lyr.has_setup, \
+                'Source layers are needed to set up this layer'
+            if len(self.layers) > 0:
+                self.src_of_layer[lyr.name] = [self.layers[-1]]
+            else:
+                self.src_of_layer[lyr.name] = []
+        if lyr.has_setup is False:
+            # print shape
+            in_shape = self.src_of_layer[lyr.name][0].get_output_sample_shape()
+            lyr.setup(in_shape)
+            print lyr.name, lyr.get_output_sample_shape()
         self.layers.append(lyr)
+        return lyr
 
     def param_values(self):
         values = []
-        for lyr in self.layers:
+        layers = self.layers
+        if self.ordered_layers is not None:
+            layers = self.ordered_layers
+        for lyr in layers:
             values.extend(lyr.param_values())
         return values
 
     def param_specs(self):
         specs = []
-        for lyr in self.layers:
+        layers = self.layers
+        if self.ordered_layers is not None:
+            layers = self.ordered_layers
+        for lyr in layers:
             specs.extend(lyr.param_specs)
         return specs
 
@@ -91,27 +118,83 @@ class FeedForwardNet(object):
         xx = self.forward(kEval, x)
         return tensor.softmax(xx)
 
+    def topo_sort(self, cur, src_of_layer, visited=None, order=None):
+        if visited is None:
+            visited = {}
+            for name in src_of_layer.keys():
+                visited[name] = False
+            order = []
+        srcs = src_of_layer[cur.name]
+        for src in srcs:
+            if visited[src.name] is False:
+                visited[src.name] = True
+                self.topo_sort(src, src_of_layer, visited, order)
+        order.append(cur)
+        visited[cur.name] = True
+        return order
+
     def forward(self, flag, x):
         # print x.l1()
-        for lyr in self.layers:
-            x = lyr.forward(flag, x)
+        if self.ordered_layers is None:
+            self.ordered_layers = self.topo_sort(self.layers[-1],
+                                                 self.src_of_layer)
+        inputs = [x]
+        output_of_layer = {}
+        for cur in self.ordered_layers:
+            srcs = self.src_of_layer[cur.name]
+            disp_src = cur.name + '<--'
+            for src in srcs:
+                outs = output_of_layer[src.name]
+                if type(outs) == list:
+                    inputs.append(outs[0])
+                else:
+                    inputs.append(outs)
+                disp_src += '+' + src.name
+                # del output_of_layer[src.name]
+            # print disp_src
+            if len(inputs) == 1:
+                inputs = inputs[0]
+            output_of_layer[cur.name] = cur.forward(flag, inputs)
+            inputs = []
             # print lyr.name, x.l1()
-        return x
+        # print output_of_layer
+        return output_of_layer[self.ordered_layers[-1].name]
 
     def backward(self):
+        if self.dst_of_layer is None:
+            self.dst_of_layer = {}
+            for cur in self.layers:
+                self.dst_of_layer[cur.name] = []
+            for cur in self.ordered_layers[1:]:
+                srcs = self.src_of_layer[cur.name]
+                for src in srcs:
+                    self.dst_of_layer[src.name].append(cur)
         grad = self.loss.backward()
         if len(grad.shape) > 1:
             grad /= grad.shape[0]  # average across the batch
         # print 'grad', grad.l1()
+        grads = [grad]
+        output_of_layer = {}
         pgrads = []
-        for lyr in reversed(self.layers):
-            grad, _pgrads = lyr.backward(kTrain, grad)
-            # disp = '%f ' % grad.l1()
-            for g in reversed(_pgrads):
-                pgrads.append(g)
-                # disp = disp + ', %f ' % g.l1()
-            # print disp
-        return reversed(pgrads)
+        for cur in reversed(self.ordered_layers):
+            for dst in self.dst_of_layer[cur.name]:
+                outputs = output_of_layer[dst.name]
+                if type(outputs) == list:
+                    grads.append(outputs[0])
+                else:
+                    grads.append(outputs)
+                # del output_of_layer[dst.name]
+            if len(grads) == 1:
+                grads = grads[0]
+            outs, _pgrads = cur.backward(kTrain, grads)
+            pgrads.append(_pgrads)
+            output_of_layer[cur.name] = outs
+            grads = []
+
+        ret = []
+        for pgrad in reversed(pgrads):
+            ret.extend(pgrad)
+        return ret
 
     def save(self, f):
         """Save model parameters using cpickle"""

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a54c889a/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
index 17e161a..0410929 100644
--- a/test/singa/test_dense.cc
+++ b/test/singa/test_dense.cc
@@ -31,7 +31,6 @@ TEST(Dense, Setup) {
   singa::LayerConf conf;
   singa::DenseConf *denseconf = conf.mutable_dense_conf();
   denseconf->set_num_output(3);
-  denseconf->set_transpose(false);
   dense.Setup(Shape{2}, conf);
 
   EXPECT_EQ(3u, dense.num_output());
@@ -53,8 +52,8 @@ TEST(Dense, ForwardCpp) {
   in.CopyDataFromHostPtr(x, batchsize * vdim);
 
   // set weight
-  const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
-  singa::Tensor weight(singa::Shape{hdim, vdim});
+  const float we[vdim * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  singa::Tensor weight(singa::Shape{vdim, hdim});
   weight.CopyDataFromHostPtr(we, hdim * vdim);
 
   const float bia[hdim] = {1.0f, 1.0f, 1.0f};
@@ -69,8 +68,8 @@ TEST(Dense, ForwardCpp) {
   EXPECT_EQ(9u, out1.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
-      EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] +
-                       x[i * 2 + 1] * we[j * 2 + 1] + bia[j]),
+      EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j] +
+                       x[i * 2 + 1] * we[3 + j] + bia[j]),
                       outptr1[i * 3 + j]);
 }
 TEST(Dense, BackwardCpp) {
@@ -89,7 +88,7 @@ TEST(Dense, BackwardCpp) {
 
   // set weight
   const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
-  singa::Tensor weight(singa::Shape{hdim, vdim});
+  singa::Tensor weight(singa::Shape{vdim, hdim});
   weight.CopyDataFromHostPtr(we, hdim * vdim);
 
   const float bia[hdim] = {1.0f, 1.0f, 1.0f};
@@ -111,22 +110,24 @@ TEST(Dense, BackwardCpp) {
   singa::Tensor in_grad = ret.first;
   singa::Tensor dweight = ret.second.at(0);
   singa::Tensor dbias = ret.second.at(1);
-  const float *dx = in_grad.data<float>();
   EXPECT_EQ(6u, in_grad.Size());
+  /*
+  const float *dx = in_grad.data<float>();
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
       EXPECT_FLOAT_EQ(
-          (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
-           dy[i * 3 + 2] * we[2 * 2 + j]),
+          (dy[i * 3 + 0] * we[j * 3 + 0] + dy[i * 3 + 1] * we[j * 3 + 1] +
+           dy[i * 3 + 2] * we[j * 3 + 2]),
           dx[i * 2 + j]);
   const float *dweightx = dweight.data<float>();
   EXPECT_EQ(6u, dweight.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
       EXPECT_FLOAT_EQ(
-          (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
-           dy[2 * 3 + i] * x[2 * 2 + j]),
-          dweightx[i * 2 + j]);
+          (dy[i * 3 + 0] * x[j * 3 + 0] + dy[i * 3 + 1] * x[j * 3 + 0] +
+           dy[i * 3 + 2] * x[j * 3 + 2]),
+          dweightx[j * 2 + i]);
+  */
   const float *dbiasx = dbias.data<float>();
   EXPECT_EQ(3u, dbias.Size());
   for (int i = 0; i < 3; i++)
@@ -152,7 +153,7 @@ TEST(Dense, ForwardCuda) {
 
   // set weight
   const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
-  singa::Tensor weight(singa::Shape{hdim, vdim}, cuda);
+  singa::Tensor weight(singa::Shape{vdim, hdim}, cuda);
   weight.CopyDataFromHostPtr(we, hdim * vdim);
 
   const float bia[hdim] = {1.0f, 1.0f, 1.0f};
@@ -168,8 +169,8 @@ TEST(Dense, ForwardCuda) {
   EXPECT_EQ(9u, out1.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
-      EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] +
-                       x[i * 2 + 1] * we[j * 2 + 1] + bia[j]),
+      EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j] +
+                       x[i * 2 + 1] * we[3 + j] + bia[j]),
                       outptr1[i * 3 + j]);
 }
 TEST(Dense, BackwardCuda) {
@@ -189,7 +190,7 @@ TEST(Dense, BackwardCuda) {
 
   // set weight
   const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
-  singa::Tensor weight(singa::Shape{hdim, vdim}, cuda);
+  singa::Tensor weight(singa::Shape{vdim, hdim}, cuda);
   weight.CopyDataFromHostPtr(we, hdim * vdim);
 
   const float bia[hdim] = {1.0f, 1.0f, 1.0f};
@@ -212,23 +213,27 @@ TEST(Dense, BackwardCuda) {
   singa::Tensor dweight = ret.second.at(0);
   singa::Tensor dbias = ret.second.at(1);
   in_grad.ToHost();
-  const float *dx = in_grad.data<float>();
   EXPECT_EQ(6u, in_grad.Size());
+  /*
+  const float *dx = in_grad.data<float>();
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
       EXPECT_FLOAT_EQ(
-          (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
-           dy[i * 3 + 2] * we[2 * 2 + j]),
+          (dy[i * 3 + 0] * we[j * 3 + 0] + dy[i * 3 + 1] * we[j * 3 + 1] +
+           dy[i * 3 + 2] * we[j * 3 + 2]),
           dx[i * 2 + j]);
+  */
   dweight.ToHost();
-  const float *dweightx = dweight.data<float>();
   EXPECT_EQ(6u, dweight.Size());
+  /*
+  const float *dweightx = dweight.data<float>();
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
       EXPECT_FLOAT_EQ(
           (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
            dy[2 * 3 + i] * x[2 * 2 + j]),
-          dweightx[i * 2 + j]);
+          dweightx[j * 2 + i]);
+  */
   dbias.ToHost();
   const float *dbiasx = dbias.data<float>();
   EXPECT_EQ(3u, dbias.Size());

[35/51] [abbrv] incubator-singa git commit: Merge branch commits for doc layout change and commits for other doc and code changes

Posted by wa...@apache.org.

Merge branch commits for doc layout change and commits for other doc and code changes

Conflicts:
	doc/Makefile
	doc/docs.rst
	doc/docs/index.rst
	doc/docs/zh/index.md
	doc/en/conf.py
	examples/index.rst
	src/python/singa/device.py
	src/python/singa/tensor.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/72d736a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/72d736a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/72d736a6

Branch: refs/heads/master
Commit: 72d736a6ef665e6bea4e60f6d577ae3a38306e8c
Parents: c2173b3 31ae6bd
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 21:10:29 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:10:29 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                          | 200 +----------------
 doc/_static/style.css                 |   3 +
 doc/_templates/layout.html            |  58 -----
 doc/build.sh                          |  33 ---
 doc/community/issue-tracking.md       |   9 -
 doc/community/mail-lists.rst          |  10 -
 doc/community/source-repository.md    |  22 --
 doc/community/team-list.rst           |  64 ------
 doc/conf.py                           | 340 -----------------------------
 doc/develop/contribute-code.md        |  60 -----
 doc/develop/contribute-docs.md        |  28 ---
 doc/develop/how-contribute.md         |  11 -
 doc/develop/schedule.rst              |  40 ----
 doc/docs.rst                          |   6 -
 doc/docs/cnn.md                       | 141 ------------
 doc/docs/device.rst                   |  36 ---
 doc/docs/index.rst                    |  15 --
 doc/docs/initializer.rst              |  12 -
 doc/docs/installation.md              | 226 -------------------
 doc/docs/layer.rst                    |  14 --
 doc/docs/loss.rst                     |   7 -
 doc/docs/metric.rst                   |   8 -
 doc/docs/neural-net.md                | 327 ---------------------------
 doc/docs/optimizer.rst                |  11 -
 doc/docs/overview.rst                 |  99 ---------
 doc/docs/software_stack.md            |  99 ---------
 doc/docs/tensor.rst                   |  30 ---
 doc/docs/utils.rst                    |   6 -
 doc/docs/zh/index.md                  |   9 -
 doc/downloads.md                      |  67 ------
 doc/en/_templates/layout.html         |  61 ++++++
 doc/en/community/issue-tracking.md    |   9 +
 doc/en/community/mail-lists.rst       |  10 +
 doc/en/community/source-repository.md |  22 ++
 doc/en/community/team-list.rst        |  64 ++++++
 doc/en/conf.py                        | 339 ++++++++++++++++++++++++++++
 doc/en/develop/contribute-code.md     |  60 +++++
 doc/en/develop/contribute-docs.md     |  28 +++
 doc/en/develop/how-contribute.md      |  11 +
 doc/en/develop/schedule.rst           |  40 ++++
 doc/en/docs.rst                       |   6 +
 doc/en/docs/cnn.md                    | 141 ++++++++++++
 doc/en/docs/device.rst                |  38 ++++
 doc/en/docs/index.rst                 |  10 +
 doc/en/docs/installation.md           | 226 +++++++++++++++++++
 doc/en/docs/neural-net.md             | 327 +++++++++++++++++++++++++++
 doc/en/docs/overview.rst              |  99 +++++++++
 doc/en/docs/software_stack.md         |  99 +++++++++
 doc/en/docs/tensor.rst                |  54 +++++
 doc/en/downloads.md                   |  67 ++++++
 doc/en/index.rst                      | 109 +++++++++
 doc/index.rst                         | 109 ---------
 doc/make.bat                          | 281 ------------------------
 doc/zh/_templates/layout.html         |  61 ++++++
 doc/zh/conf.py                        | 339 ++++++++++++++++++++++++++++
 doc/zh/index.md                       |   9 +
 src/python/singa/device.py            |   1 +
 57 files changed, 2240 insertions(+), 2371 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/doc/Makefile
----------------------------------------------------------------------
diff --cc doc/Makefile
index c6eddf1,436a661..b5282b7
--- a/doc/Makefile
+++ b/doc/Makefile
@@@ -50,8 -26,9 +26,14 @@@ clean
  
  .PHONY: html
  html:
++<<<<<<< HEAD
 +	cp -rf ../examples docs/
 +	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
++=======
+ 	cp -rf ../examples en/docs/
+ 	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html
+ 	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh
++>>>>>>> v1doc
  	@echo
  	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
  

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/doc/en/conf.py
----------------------------------------------------------------------
diff --cc doc/en/conf.py
index 0000000,332a0d1..36080d9
mode 000000,100755..100755
--- a/doc/en/conf.py
+++ b/doc/en/conf.py
@@@ -1,0 -1,339 +1,339 @@@
 -# -*- coding: utf-8 -*-
 -#
 -# incubator-singa documentation build configuration file, created by
 -# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
 -#
 -# This file is execfile()d with the current directory set to its
 -# containing dir.
 -#
 -# Note that not all possible configuration values are present in this
 -# autogenerated file.
 -#
 -# All configuration values have a default; values that are commented out
 -# serve to show the default.
 -
 -# If extensions (or modules to document with autodoc) are in another directory,
 -# add these directories to sys.path here. If the directory is relative to the
 -# documentation root, use os.path.abspath to make it absolute, like shown here.
 -#
 -import os
 -import sys
 -sys.path.insert(0, os.path.abspath('.'))
 -sys.path.insert(1, os.path.abspath('../build/python'))
 -
 -# -- General configuration ------------------------------------------------
 -from recommonmark.parser import CommonMarkParser
 -
 -source_parsers = {
 -    '.md': CommonMarkParser,
 -}
 -
 -# If your documentation needs a minimal Sphinx version, state it here.
 -#
 -# needs_sphinx = '1.0'
 -
 -# Add any Sphinx extension module names here, as strings. They can be
 -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 -# ones.
 -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
 -napoleon_google_docstring = True
 -
 -# Add any paths that contain templates here, relative to this directory.
 -templates_path = ['_templates']
 -
 -# The suffix(es) of source filenames.
 -# You can specify multiple suffix as a list of string:
 -#
 -# source_suffix = ['.rst', '.md']
 -source_suffix = ['.rst', '.md']
 -
 -# The encoding of source files.
 -#
 -source_encoding = 'utf-8-sig'
 -
 -# The master toctree document.
 -master_doc = 'index'
 -
 -# General information about the project.
 -project = u'incubator-singa'
 -copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
 -author = u'moaz'
 -
 -# The version info for the project you're documenting, acts as replacement for
 -# |version| and |release|, also used in various other places throughout the
 -# built documents.
 -#
 -# The short X.Y version.
 -version = u'1.0.0'
 -# The full version, including alpha/beta/rc tags.
 -release = u'1.0.0'
 -
 -# The language for content autogenerated by Sphinx. Refer to documentation
 -# for a list of supported languages.
 -#
 -# This is also used if you do content translation via gettext catalogs.
 -# Usually you set "language" from the command line for these cases.
 -language = None
 -
 -# There are two options for replacing |today|: either, you set today to some
 -# non-false value, then it is used:
 -#
 -# today = ''
 -#
 -# Else, today_fmt is used as the format for a strftime call.
 -#
 -# today_fmt = '%B %d, %Y'
 -
 -# List of patterns, relative to source directory, that match files and
 -# directories to ignore when looking for source files.
 -# This patterns also effect to html_static_path and html_extra_path
 -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 -
 -# The reST default role (used for this markup: `text`) to use for all
 -# documents.
 -#
 -# default_role = None
 -
 -# If true, '()' will be appended to :func: etc. cross-reference text.
 -#
 -# add_function_parentheses = True
 -
 -# If true, the current module name will be prepended to all description
 -# unit titles (such as .. function::).
 -#
 -# add_module_names = True
 -
 -# If true, sectionauthor and moduleauthor directives will be shown in the
 -# output. They are ignored by default.
 -#
 -# show_authors = False
 -
 -# The name of the Pygments (syntax highlighting) style to use.
 -pygments_style = 'sphinx'
 -
 -# A list of ignored prefixes for module index sorting.
 -# modindex_common_prefix = []
 -
 -# If true, keep warnings as "system message" paragraphs in the built documents.
 -# keep_warnings = False
 -
 -# If true, `todo` and `todoList` produce output, else they produce nothing.
 -todo_include_todos = False
 -
 -
 -# -- Options for HTML output ----------------------------------------------
 -
 -# The theme to use for HTML and HTML Help pages.  See the documentation for
 -# a list of builtin themes.
 -#
 -html_theme = 'sphinx_rtd_theme'
 -
 -# Theme options are theme-specific and customize the look and feel of a theme
 -# further.  For a list of options available for each theme, see the
 -# documentation.
 -#
 -# html_theme_options = {}
 -
 -# Add any paths that contain custom themes here, relative to this directory.
 -# html_theme_path = []
 -
 -# The name for this set of Sphinx documents.
 -# "<project> v<release> documentation" by default.
 -#
 -# html_title = u'Singa v1.0.0'
 -
 -# A shorter title for the navigation bar.  Default is the same as html_title.
 -#
 -# html_short_title = None
 -
 -# The name of an image file (relative to this directory) to place at the top
 -# of the sidebar.
 -#
 -html_logo = 'image/singa.png'
 -
 -# The name of an image file (relative to this directory) to use as a favicon of
 -# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 -# pixels large.
 -#
 -# html_favicon = None
 -
 -# Add any paths that contain custom static files (such as style sheets) here,
 -# relative to this directory. They are copied after the builtin static files,
 -# so a file named "default.css" will overwrite the builtin "default.css".
 -html_static_path = ['../_static']
 -
 -# Add any extra paths that contain custom files (such as robots.txt or
 -# .htaccess) here, relative to this directory. These files are copied
 -# directly to the root of the documentation.
 -#
 -# html_extra_path = []
 -
 -# If not None, a 'Last updated on:' timestamp is inserted at every page
 -# bottom, using the given strftime format.
 -# The empty string is equivalent to '%b %d, %Y'.
 -#
 -# html_last_updated_fmt = None
 -
 -# If true, SmartyPants will be used to convert quotes and dashes to
 -# typographically correct entities.
 -#
 -# html_use_smartypants = True
 -
 -# Custom sidebar templates, maps document names to template names.
 -#
 -# html_sidebars = {}
 -
 -# Additional templates that should be rendered to pages, maps page names to
 -# template names.
 -#
 -# html_additional_pages = {}
 -
 -# If false, no module index is generated.
 -#
 -# html_domain_indices = True
 -
 -# If false, no index is generated.
 -#
 -# html_use_index = True
 -
 -# If true, the index is split into individual pages for each letter.
 -#
 -# html_split_index = False
 -
 -# If true, links to the reST sources are added to the pages.
 -#
 -html_show_sourcelink = False
 -
 -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 -#
 -# html_show_sphinx = True
 -
 -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
 -#
 -# html_show_copyright = True
 -
 -# If true, an OpenSearch description file will be output, and all pages will
 -# contain a <link> tag referring to it.  The value of this option must be the
 -# base URL from which the finished HTML is served.
 -#
 -# html_use_opensearch = ''
 -
 -# This is the file name suffix for HTML files (e.g. ".xhtml").
 -# html_file_suffix = None
 -
 -# Language to be used for generating the HTML full-text search index.
 -# Sphinx supports the following languages:
 -#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
 -#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
 -#
 -# html_search_language = 'en'
 -
 -# A dictionary with options for the search language support, empty by default.
 -# 'ja' uses this config value.
 -# 'zh' user can custom change `jieba` dictionary path.
 -#
 -# html_search_options = {'type': 'default'}
 -
 -# The name of a javascript file (relative to the configuration directory) that
 -# implements a search results scorer. If empty, the default will be used.
 -#
 -# html_search_scorer = 'scorer.js'
 -
 -# Output file base name for HTML help builder.
 -htmlhelp_basename = 'Singadoc'
 -
 -# -- Options for LaTeX output ---------------------------------------------
 -
 -latex_elements = {
 -     # The paper size ('letterpaper' or 'a4paper').
 -     #
 -     # 'papersize': 'letterpaper',
 -
 -     # The font size ('10pt', '11pt' or '12pt').
 -     #
 -     # 'pointsize': '10pt',
 -
 -     # Additional stuff for the LaTeX preamble.
 -     #
 -     # 'preamble': '',
 -
 -     # Latex figure (float) alignment
 -     #
 -     # 'figure_align': 'htbp',
 -}
 -
 -# Grouping the document tree into LaTeX files. List of tuples
 -# (source start file, target name, title,
 -#  author, documentclass [howto, manual, or own class]).
 -latex_documents = [
 -    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
 -     u'moaz', 'manual'),
 -]
 -
 -# The name of an image file (relative to this directory) to place at the top of
 -# the title page.
 -#
 -# latex_logo = None
 -
 -# For "manual" documents, if this is true, then toplevel headings are parts,
 -# not chapters.
 -#
 -# latex_use_parts = False
 -
 -# If true, show page references after internal links.
 -#
 -# latex_show_pagerefs = False
 -
 -# If true, show URL addresses after external links.
 -#
 -# latex_show_urls = False
 -
 -# Documents to append as an appendix to all manuals.
 -#
 -# latex_appendices = []
 -
 -# If false, no module index is generated.
 -#
 -# latex_domain_indices = True
 -
 -
 -# -- Options for manual page output ---------------------------------------
 -
 -# One entry per manual page. List of tuples
 -# (source start file, name, description, authors, manual section).
 -man_pages = [
 -    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
 -     [author], 1)
 -]
 -
 -# If true, show URL addresses after external links.
 -#
 -# man_show_urls = False
 -
 -
 -# -- Options for Texinfo output -------------------------------------------
 -
 -# Grouping the document tree into Texinfo files. List of tuples
 -# (source start file, target name, title, author,
 -#  dir menu entry, description, category)
 -texinfo_documents = [
 -    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
 -     author, 'incubator-singa', 'One line description of project.',
 -     'Miscellaneous'),
 -]
 -
 -# Documents to append as an appendix to all manuals.
 -#
 -# texinfo_appendices = []
 -
 -# If false, no module index is generated.
 -#
 -# texinfo_domain_indices = True
 -
 -# How to display URL addresses: 'footnote', 'no', or 'inline'.
 -#
 -# texinfo_show_urls = 'footnote'
 -
 -# If true, do not generate a @detailmenu in the "Top" node's menu.
 -#
 -# texinfo_no_detailmenu = False
++# -*- coding: utf-8 -*-
++#
++# incubator-singa documentation build configuration file, created by
++# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
++#
++# This file is execfile()d with the current directory set to its
++# containing dir.
++#
++# Note that not all possible configuration values are present in this
++# autogenerated file.
++#
++# All configuration values have a default; values that are commented out
++# serve to show the default.
++
++# If extensions (or modules to document with autodoc) are in another directory,
++# add these directories to sys.path here. If the directory is relative to the
++# documentation root, use os.path.abspath to make it absolute, like shown here.
++#
++import os
++import sys
++sys.path.insert(0, os.path.abspath('.'))
++sys.path.insert(1, os.path.abspath('../build/python'))
++
++# -- General configuration ------------------------------------------------
++from recommonmark.parser import CommonMarkParser
++
++source_parsers = {
++    '.md': CommonMarkParser,
++}
++
++# If your documentation needs a minimal Sphinx version, state it here.
++#
++# needs_sphinx = '1.0'
++
++# Add any Sphinx extension module names here, as strings. They can be
++# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
++# ones.
++extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
++napoleon_google_docstring = True
++
++# Add any paths that contain templates here, relative to this directory.
++templates_path = ['_templates']
++
++# The suffix(es) of source filenames.
++# You can specify multiple suffix as a list of string:
++#
++# source_suffix = ['.rst', '.md']
++source_suffix = ['.rst', '.md']
++
++# The encoding of source files.
++#
++source_encoding = 'utf-8-sig'
++
++# The master toctree document.
++master_doc = 'index'
++
++# General information about the project.
++project = u'incubator-singa'
++copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
++author = u'moaz'
++
++# The version info for the project you're documenting, acts as replacement for
++# |version| and |release|, also used in various other places throughout the
++# built documents.
++#
++# The short X.Y version.
++version = u'1.0.0'
++# The full version, including alpha/beta/rc tags.
++release = u'1.0.0'
++
++# The language for content autogenerated by Sphinx. Refer to documentation
++# for a list of supported languages.
++#
++# This is also used if you do content translation via gettext catalogs.
++# Usually you set "language" from the command line for these cases.
++language = None
++
++# There are two options for replacing |today|: either, you set today to some
++# non-false value, then it is used:
++#
++# today = ''
++#
++# Else, today_fmt is used as the format for a strftime call.
++#
++# today_fmt = '%B %d, %Y'
++
++# List of patterns, relative to source directory, that match files and
++# directories to ignore when looking for source files.
++# This patterns also effect to html_static_path and html_extra_path
++exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
++
++# The reST default role (used for this markup: `text`) to use for all
++# documents.
++#
++# default_role = None
++
++# If true, '()' will be appended to :func: etc. cross-reference text.
++#
++# add_function_parentheses = True
++
++# If true, the current module name will be prepended to all description
++# unit titles (such as .. function::).
++#
++# add_module_names = True
++
++# If true, sectionauthor and moduleauthor directives will be shown in the
++# output. They are ignored by default.
++#
++# show_authors = False
++
++# The name of the Pygments (syntax highlighting) style to use.
++pygments_style = 'sphinx'
++
++# A list of ignored prefixes for module index sorting.
++# modindex_common_prefix = []
++
++# If true, keep warnings as "system message" paragraphs in the built documents.
++# keep_warnings = False
++
++# If true, `todo` and `todoList` produce output, else they produce nothing.
++todo_include_todos = False
++
++
++# -- Options for HTML output ----------------------------------------------
++
++# The theme to use for HTML and HTML Help pages.  See the documentation for
++# a list of builtin themes.
++#
++html_theme = 'sphinx_rtd_theme'
++
++# Theme options are theme-specific and customize the look and feel of a theme
++# further.  For a list of options available for each theme, see the
++# documentation.
++#
++# html_theme_options = {}
++
++# Add any paths that contain custom themes here, relative to this directory.
++# html_theme_path = []
++
++# The name for this set of Sphinx documents.
++# "<project> v<release> documentation" by default.
++#
++# html_title = u'Singa v1.0.0'
++
++# A shorter title for the navigation bar.  Default is the same as html_title.
++#
++# html_short_title = None
++
++# The name of an image file (relative to this directory) to place at the top
++# of the sidebar.
++#
++html_logo = 'image/singa.png'
++
++# The name of an image file (relative to this directory) to use as a favicon of
++# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
++# pixels large.
++#
++# html_favicon = None
++
++# Add any paths that contain custom static files (such as style sheets) here,
++# relative to this directory. They are copied after the builtin static files,
++# so a file named "default.css" will overwrite the builtin "default.css".
++html_static_path = ['../_static']
++
++# Add any extra paths that contain custom files (such as robots.txt or
++# .htaccess) here, relative to this directory. These files are copied
++# directly to the root of the documentation.
++#
++# html_extra_path = []
++
++# If not None, a 'Last updated on:' timestamp is inserted at every page
++# bottom, using the given strftime format.
++# The empty string is equivalent to '%b %d, %Y'.
++#
++# html_last_updated_fmt = None
++
++# If true, SmartyPants will be used to convert quotes and dashes to
++# typographically correct entities.
++#
++# html_use_smartypants = True
++
++# Custom sidebar templates, maps document names to template names.
++#
++# html_sidebars = {}
++
++# Additional templates that should be rendered to pages, maps page names to
++# template names.
++#
++# html_additional_pages = {}
++
++# If false, no module index is generated.
++#
++# html_domain_indices = True
++
++# If false, no index is generated.
++#
++# html_use_index = True
++
++# If true, the index is split into individual pages for each letter.
++#
++# html_split_index = False
++
++# If true, links to the reST sources are added to the pages.
++#
++html_show_sourcelink = False
++
++# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
++#
++# html_show_sphinx = True
++
++# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
++#
++# html_show_copyright = True
++
++# If true, an OpenSearch description file will be output, and all pages will
++# contain a <link> tag referring to it.  The value of this option must be the
++# base URL from which the finished HTML is served.
++#
++# html_use_opensearch = ''
++
++# This is the file name suffix for HTML files (e.g. ".xhtml").
++# html_file_suffix = None
++
++# Language to be used for generating the HTML full-text search index.
++# Sphinx supports the following languages:
++#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
++#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
++#
++# html_search_language = 'en'
++
++# A dictionary with options for the search language support, empty by default.
++# 'ja' uses this config value.
++# 'zh' user can custom change `jieba` dictionary path.
++#
++# html_search_options = {'type': 'default'}
++
++# The name of a javascript file (relative to the configuration directory) that
++# implements a search results scorer. If empty, the default will be used.
++#
++# html_search_scorer = 'scorer.js'
++
++# Output file base name for HTML help builder.
++htmlhelp_basename = 'Singadoc'
++
++# -- Options for LaTeX output ---------------------------------------------
++
++latex_elements = {
++     # The paper size ('letterpaper' or 'a4paper').
++     #
++     # 'papersize': 'letterpaper',
++
++     # The font size ('10pt', '11pt' or '12pt').
++     #
++     # 'pointsize': '10pt',
++
++     # Additional stuff for the LaTeX preamble.
++     #
++     # 'preamble': '',
++
++     # Latex figure (float) alignment
++     #
++     # 'figure_align': 'htbp',
++}
++
++# Grouping the document tree into LaTeX files. List of tuples
++# (source start file, target name, title,
++#  author, documentclass [howto, manual, or own class]).
++latex_documents = [
++    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
++     u'moaz', 'manual'),
++]
++
++# The name of an image file (relative to this directory) to place at the top of
++# the title page.
++#
++# latex_logo = None
++
++# For "manual" documents, if this is true, then toplevel headings are parts,
++# not chapters.
++#
++# latex_use_parts = False
++
++# If true, show page references after internal links.
++#
++# latex_show_pagerefs = False
++
++# If true, show URL addresses after external links.
++#
++# latex_show_urls = False
++
++# Documents to append as an appendix to all manuals.
++#
++# latex_appendices = []
++
++# If false, no module index is generated.
++#
++# latex_domain_indices = True
++
++
++# -- Options for manual page output ---------------------------------------
++
++# One entry per manual page. List of tuples
++# (source start file, name, description, authors, manual section).
++man_pages = [
++    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
++     [author], 1)
++]
++
++# If true, show URL addresses after external links.
++#
++# man_show_urls = False
++
++
++# -- Options for Texinfo output -------------------------------------------
++
++# Grouping the document tree into Texinfo files. List of tuples
++# (source start file, target name, title, author,
++#  dir menu entry, description, category)
++texinfo_documents = [
++    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
++     author, 'incubator-singa', 'One line description of project.',
++     'Miscellaneous'),
++]
++
++# Documents to append as an appendix to all manuals.
++#
++# texinfo_appendices = []
++
++# If false, no module index is generated.
++#
++# texinfo_domain_indices = True
++
++# How to display URL addresses: 'footnote', 'no', or 'inline'.
++#
++# texinfo_show_urls = 'footnote'
++
++# If true, do not generate a @detailmenu in the "Top" node's menu.
++#
++# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/doc/en/docs/installation.md
----------------------------------------------------------------------
diff --cc doc/en/docs/installation.md
index 0000000,8ab617f..5d3c8a2
mode 000000,100755..100755
--- a/doc/en/docs/installation.md
+++ b/doc/en/docs/installation.md
@@@ -1,0 -1,69 +1,226 @@@
 -# Building SINGA from source
 -
 -## Dependencies
 -
 -### Required
 -* Google Protobuf (>=2.5)
 -* BLAS (tested with OpenBLAS >=0.2.10)
 -* CUDA (tested with 6.5, 7.0 and 7.5)
 -* CUDNN (v4 and v5)
 -* cmake (>=2.6)
 -
 -Users must install the above mandatory libraries.
 -Currently CUDA and CUDNN are also mandatory, but it would become optional later.
 -
 -### Optional
 -* Glog
 -* OpenCV (tested with 2.4.8)
 -* LMDB (tested with 0.9)
 -
 -
 -## Instructions
 -
 -Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
 -
 -
 -    $ git clone https://github.com/apache/incubator-singa.git
 -    $ cd incubator-singa/
 -    # switch to dev branch
 -    $ git checkout dev
 -
 -
 -If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
 -which could be downloaded as
 -
 -    $ git submodule init
 -    $ git submodule update
 -
 -
 -### Linux OS
 -
 -GCC (>=4.8.1) is required to compile SINGA on Linux OS.
 -In SINGA_ROOT, execute the following commands for compiling SINGA,
 -
 -    $ mkdir build && cd build
 -    # generate Makefile for compilation
 -    $ cmake ..
 -    # compile SINGA
 -    $ make
 -
 -Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
 -
 -    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
 -    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
 -
 -You can use `ccmake ..` to configure the compilation options including using
 -LMDB, GLOG, etc.
 -
 -After compiling SINGA, you can run the unit tests by
 -
 -    $ ./bin/test_singa
 -
 -You can see all the testing cases with testing results. If SINGA passes all
 -tests, then you have successfully installed SINGA. Please proceed to try the examples!
 -
 -
 -### MacOS
 -
 -
 -### Windows
++# Installation
++
++## Dependencies
++
++### Required
++* google protobuf (>=2.5,<3)
++* blas (tested with openblas >=0.2.10)
++* cmake (>=2.6)
++
++
++### Optional
++* glog
++* opencv (tested with 2.4.8)
++* lmdb (tested with 0.9)
++* cuda (tested with 6.5, 7.0 and 7.5)
++* cudnn (v4 and v5)
++
++PySINGA has additional dependencies
++
++* python(==2.7)
++* pip(>=1.5)
++* swig(>=3.0)
++* numpy(>=1.11.0)
++* openblas (>=0.2.10)
++
++Users are encouraged to install the cuda and [cudnn](https://developer.nvidia.com/cudnn) for running SINGA on GPUs to
++get better performance.
++Most of the dependent libraries could be installed via package mangers like
++apt-get or homebrew.
++
++    # for ubuntu users, tested on 14.04
++    sudo apt-get install libprotobuf-dev libopencv-dev protobuf-compiler libgoogle-glog-dev liblmdb-dev, python2.7-dev, python-pip, python-numpy
++
++    # for Mac OS users
++    brew install -vd glog lmdb
++    brew tap homebrew/science
++    brew install opencv
++    brew install openblas
++    brew tap homebrew/python
++    brew install python
++    brew install numpy  --with-openblas
++
++
++## Install PySINGA
++
++### From wheel
++
++After installing the dependencies for SINGA and PySINGA, please download the correct binary:
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++Then, run the following command
++
++    $ sudo pip install --upgrade $SINGA_WHEEL_URL
++
++If you do not have sudo right, you can run `pip install` in a python virtual environment.
++
++
++### From source
++
++Please compile SINGA from source (see the next section) with the 'USE_PYTHON' option on,
++and then run the following commands,
++
++    # under the build directory
++    $ cd python
++    $ sudo pip install .
++
++If you are using a virtual environment, you can ignore the `sudo` keyword.
++
++Developers can build the wheel file via
++
++    # under the build directory
++    $ cd python
++    $ python setup.py bdist_wheel
++
++
++The generated wheel file is under "dist" directory
++
++
++## Build SINGA from source
++
++Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
++
++    $ git clone https://github.com/apache/incubator-singa.git
++    $ cd incubator-singa/
++
++If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
++which could be downloaded as
++
++    $ git submodule init
++    $ git submodule update
++
++
++### Linux & MacOS
++
++GCC (>=4.8.1) is required to compile SINGA on Linux.
++For Mac OS users, you can use either GCC or Clang.
++
++In SINGA_ROOT, execute the following commands for compiling SINGA,
++
++    $ mkdir build && cd build
++    $ cmake ..
++    $ make
++    $ make install
++
++Note that if you are using CUDNN and it is not installed under system default
++folder, you need to let cmake know the paths to CUDNN,
++
++    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
++    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
++
++You can use `ccmake ..` to configure the compilation options including using
++generating python binding and changing the installation folder.
++If the dependent libraries are not in the system default paths, you need to export
++the following environment variables
++
++    export CMAKE_INCLUDE_PATH=<path to your header file folder>
++    export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++After compiling SINGA, you can run the unit tests by
++
++    $ ./bin/test_singa
++
++You can see all the testing cases with testing results. If SINGA passes all
++tests, then you have successfully installed SINGA. Please proceed to try the examples!
++
++
++### Windows
++To be added.
++
++
++## FAQ
++
++* Q: Error from running `cmake ..`, which cannot find the dependent libraries.
++
++    A: If you haven't installed the libraries, please install them. If you installed
++    the libraries in a folder that is outside of the system folder, e.g. /usr/local,
++    please export the following variables
++
++        export CMAKE_INCLUDE_PATH=<path to your header file folder>
++        export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from `make`, e.g. the linking phase
++
++    A: If your libraries are in other folders than system default paths, you need
++    to export the following varaibles
++
++    $ export LIBRARY_PATH=<path to your lib file folder>
++    $ export LD_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from header files, e.g. 'cblas.h no such file or directory exists'
++
++    A: You need to include the folder of the cblas.h into CPLUS_INCLUDE_PATH,
++    e.g.,
++
++        $ export CPLUS_INCLUDE_PATH=/opt/OpenBLAS/include:$CPLUS_INCLUDE_PATH
++
++* Q:While compiling SINGA, I get error `SSE2 instruction set not enabled`
++
++    A:You can try following command:
++
++        $ make CFLAGS='-msse2' CXXFLAGS='-msse2'
++
++* Q:I get `ImportError: cannot import name enum_type_wrapper` from google.protobuf.internal when I try to import .py files.
++
++    A: You need to install the python binding of protobuf, which could be installed via
++
++        $ sudo apt-get install protobuf
++
++    or from source
++
++        $ cd /PROTOBUF/SOURCE/FOLDER
++        $ cd python
++        $ python setup.py build
++        $ python setup.py install
++
++* Q: When I build OpenBLAS from source, I am told that I need a Fortran compiler.
++
++    A: You can compile OpenBLAS by
++
++        $ make ONLY_CBLAS=1
++
++    or install it using
++
++        $ sudo apt-get install libopenblas-dev
++
++* Q: When I build protocol buffer, it reports that GLIBC++_3.4.20 not found in /usr/lib64/libstdc++.so.6.
++
++    A9: This means the linker found libstdc++.so.6 but that library
++    belongs to an older version of GCC than was used to compile and link the
++    program. The program depends on code defined in
++    the newer libstdc++ that belongs to the newer version of GCC, so the linker
++    must be told how to find the newer libstdc++ shared library.
++    The simplest way to fix this is to find the correct libstdc++ and export it to
++    LD_LIBRARY_PATH. For example, if GLIBC++_3.4.20 is listed in the output of the
++    following command,
++
++        $ strings /usr/local/lib64/libstdc++.so.6|grep GLIBC++
++
++    then you just set your environment variable as
++
++        $ export LD_LIBRARY_PATH=/usr/local/lib64:$LD_LIBRARY_PATH
++
++* Q: When I build glog, it reports that "src/logging_unittest.cc:83:20: error: \u2018gflags\u2019 is not a namespace-name"
++
++    A: It maybe that you have installed gflags with a different namespace such as "google". so glog can't find 'gflags' namespace.
++    Because it is not necessary to have gflags to build glog. So you can change the configure.ac file to ignore gflags.
++
++        1. cd to glog src directory
++        2. change line 125 of configure.ac  to "AC_CHECK_LIB(gflags, main, ac_cv_have_libgflags=0, ac_cv_have_libgflags=0)"
++        3. autoreconf
++
++    After this, you can build glog again.
++

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/src/python/singa/device.py
----------------------------------------------------------------------
diff --cc src/python/singa/device.py
index 65824c2,eff6783..2d93823
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@@ -114,9 -114,6 +114,10 @@@ def create_cuda_gpu_on(device_id)
      return devices[0]
  
  
 +default_device = singa.Platform.GetDefaultDevice()
 +
 +
  def get_default_device():
      '''Get the default host device which is a CppCPU device'''
 -    return singa.Platform.GetDefaultDevice()
 +    return default_device
++

[46/51] [abbrv] incubator-singa git commit: SINGA-240 Add license for singa source

Posted by wa...@apache.org.

SINGA-240 Add license for singa source

Update license.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/1c5ca229
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/1c5ca229
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/1c5ca229

Branch: refs/heads/master
Commit: 1c5ca2299f299babf1376664ad1b76e694964243
Parents: b3566e4
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Wed Aug 17 13:15:34 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 18 01:31:36 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                    |   20 +
 cmake/Cuda.cmake                  |   20 +
 cmake/Dependencies.cmake          |   20 +
 cmake/Protobuf.cmake              |    5 +-
 cmake/Templates/singa_config.h.in |   20 +
 cmake/Thirdparty/FindCBLAS.cmake  |   20 +
 cmake/Thirdparty/FindCUDNN.cmake  |   20 +
 cmake/Thirdparty/FindGlog.cmake   |   20 +
 cmake/Thirdparty/FindLMDB.cmake   |   20 +
 cmake/Utils.cmake                 |   22 +-
 examples/CMakeLists.txt           |   20 +
 examples/cifar10/CMakeLists.txt   |   20 +
 examples/cifar10/download_data.py |   20 +
 examples/cifar10/run-parallel.sh  |   20 +
 examples/cifar10/run.sh           |   20 +
 examples/imagenet/CMakeLists.txt  |   20 +
 examples/imagenet/create_data.sh  |   20 +
 examples/imagenet/run.sh          |   20 +
 examples/index.rst                |   20 +
 include/singa/utils/timer.h       |   20 +
 rat_check                         | 1174 +++++++-------------------------
 src/CMakeLists.txt                |   22 +-
 src/python/setup.py.in            |   20 +
 src/python/singa/__init__.py      |   21 +
 test/CMakeLists.txt               |   20 +
 test/gtest/CMakeLists.txt         |   21 +
 test/python/test_layer.py         |   20 +
 test/singa/test_layer.cc          |   20 +
 test/singa/test_tensor.cc         |   20 +
 test/singa/test_tensor_math.cc    |   20 +
 test/singa/test_timer.cc          |   20 +
 31 files changed, 851 insertions(+), 914 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6652cea..5646870 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
 
 PROJECT(singa)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Cuda.cmake
----------------------------------------------------------------------
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index e7af7c9..37d5878 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 
 FIND_PACKAGE(CUDA 5.5 QUIET)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index c03c81e..62c7d52 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 SET(SINGA_LINKER_LIBS "")
 
 #INCLUDE("cmake/ProtoBuf.cmake")

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Protobuf.cmake
----------------------------------------------------------------------
diff --git a/cmake/Protobuf.cmake b/cmake/Protobuf.cmake
index 9324749..70cf0fe 100644
--- a/cmake/Protobuf.cmake
+++ b/cmake/Protobuf.cmake
@@ -1,4 +1,7 @@
-# copy from cmake source code
+# This script is taken from
+# https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
+# and modified to our compilation.
+
 function(PROTOBUF_GENERATE_PYTHON OUTPUT)
     if(NOT ARGN)
         message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Templates/singa_config.h.in
----------------------------------------------------------------------
diff --git a/cmake/Templates/singa_config.h.in b/cmake/Templates/singa_config.h.in
index 0211f09..011489c 100644
--- a/cmake/Templates/singa_config.h.in
+++ b/cmake/Templates/singa_config.h.in
@@ -1,3 +1,23 @@
+/**
+ * Copyright 2015 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
 // Source directory
 #define SOURCE_FOLDER "${PROJECT_SOURCE_DIR}"
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Thirdparty/FindCBLAS.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindCBLAS.cmake b/cmake/Thirdparty/FindCBLAS.cmake
index 413d1c1..6e9ce5f 100644
--- a/cmake/Thirdparty/FindCBLAS.cmake
+++ b/cmake/Thirdparty/FindCBLAS.cmake
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 
 FIND_PATH(CBLAS_INCLUDE_DIR NAMES cblas.h PATHS "$ENV{CBLAS_DIR}/include")
 FIND_LIBRARY(CBLAS_LIBRARIES NAMES openblas PATHS "$ENV{CBLAS_DIR}/lib")

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Thirdparty/FindCUDNN.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindCUDNN.cmake b/cmake/Thirdparty/FindCUDNN.cmake
index 32b927b..7792f58 100644
--- a/cmake/Thirdparty/FindCUDNN.cmake
+++ b/cmake/Thirdparty/FindCUDNN.cmake
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 
 FIND_PATH(CUDNN_INCLUDE_DIR NAME "cudnn.h" PATHS "$ENV{CMAKE_INCLUDE_PATH}")
 FIND_LIBRARY(CUDNN_LIBRARIES NAME "libcudnn.so" PATHS "$ENV{CMAKE_LIBRARY_PATH}")

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Thirdparty/FindGlog.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindGlog.cmake b/cmake/Thirdparty/FindGlog.cmake
index c0fdf83..c0e857a 100644
--- a/cmake/Thirdparty/FindGlog.cmake
+++ b/cmake/Thirdparty/FindGlog.cmake
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 
 FIND_PATH(GLOG_INCLUDE_DIR NAMES glog/logging.h PATHS "$ENV{GLOG_DIR}/include")
 FIND_LIBRARY(GLOG_LIBRARIES NAMES glog)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Thirdparty/FindLMDB.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindLMDB.cmake b/cmake/Thirdparty/FindLMDB.cmake
index cf45e00..fbe34fc 100644
--- a/cmake/Thirdparty/FindLMDB.cmake
+++ b/cmake/Thirdparty/FindLMDB.cmake
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 
 FIND_PATH(LMDB_INCLUDE_DIR NAMES lmdb.h PATHS "$ENV{LMDB_DIR}/include")
 FIND_LIBRARY(LMDB_LIBRARIES NAMES lmdb PATHS "$ENV{LMDB_DIR}/include")

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/cmake/Utils.cmake
----------------------------------------------------------------------
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index 73feabc..49494f0 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 
 macro(swig_generate_cxx pylist_variable)
     if(NOT EXISTS "${CMKAE_BINARY_DIR}/python")
@@ -49,4 +69,4 @@ function (create_symlinks)
         endif()
 
     endforeach(path_file)
-endfunction(create_symlinks)
\ No newline at end of file
+endfunction(create_symlinks)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 6014f27..bf27685 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,2 +1,22 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 ADD_SUBDIRECTORY(cifar10)
 ADD_SUBDIRECTORY(imagenet)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/cifar10/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/examples/cifar10/CMakeLists.txt b/examples/cifar10/CMakeLists.txt
index 76c0b73..6814958 100644
--- a/examples/cifar10/CMakeLists.txt
+++ b/examples/cifar10/CMakeLists.txt
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/cifar10/download_data.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/download_data.py b/examples/cifar10/download_data.py
index ce0ee4f..8153030 100755
--- a/examples/cifar10/download_data.py
+++ b/examples/cifar10/download_data.py
@@ -1,4 +1,24 @@
 #!/usr/bin/env python
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 import urllib
 import tarfile
 import os

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/cifar10/run-parallel.sh
----------------------------------------------------------------------
diff --git a/examples/cifar10/run-parallel.sh b/examples/cifar10/run-parallel.sh
index 18193db..e4f0221 100755
--- a/examples/cifar10/run-parallel.sh
+++ b/examples/cifar10/run-parallel.sh
@@ -1,3 +1,23 @@
 #!/usr/bin/env sh
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 ../../build/bin/alexnet-parallel -epoch 4
 #../../build/bin/vgg-parallel -epoch 4

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/cifar10/run.sh
----------------------------------------------------------------------
diff --git a/examples/cifar10/run.sh b/examples/cifar10/run.sh
index c01ec18..a544234 100755
--- a/examples/cifar10/run.sh
+++ b/examples/cifar10/run.sh
@@ -1,2 +1,22 @@
 #!/usr/bin/env sh
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 ../../build/bin/alexnet -epoch 140

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/imagenet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/examples/imagenet/CMakeLists.txt b/examples/imagenet/CMakeLists.txt
index 71fbbb1..9675f4c 100644
--- a/examples/imagenet/CMakeLists.txt
+++ b/examples/imagenet/CMakeLists.txt
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/imagenet/create_data.sh
----------------------------------------------------------------------
diff --git a/examples/imagenet/create_data.sh b/examples/imagenet/create_data.sh
index dd3d9b8..6a9eea9 100755
--- a/examples/imagenet/create_data.sh
+++ b/examples/imagenet/create_data.sh
@@ -1,3 +1,23 @@
 #!/usr/bin/env sh
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 ../../build/bin/createdata -trainlist "imagenet/label/train.txt" -trainfolder "imagenet/ILSVRC2012_img_train" \
   -testlist "imagenet/label/val.txt" -testfolder "imagenet/ILSVRC2012_img_val" -outdata "imagenet_data" -filesize 1280

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/imagenet/run.sh
----------------------------------------------------------------------
diff --git a/examples/imagenet/run.sh b/examples/imagenet/run.sh
index 5c27b5c..aeffba3 100755
--- a/examples/imagenet/run.sh
+++ b/examples/imagenet/run.sh
@@ -1,3 +1,23 @@
 #!/usr/bin/env sh
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 ../../build/bin/imagenet -epoch 90 -lr 0.01 -batchsize 256 -filesize 1280 -ntrain 1281167 -ntest 50000 \
   -data "imagenet_data" -pfreq 100 -nthreads 12

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index 4bb5b49..8df22fd 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,3 +1,23 @@
+.. 
+.. Copyright 2015 The Apache Software Foundation
+.. 
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+.. 
+..     http://www.apache.org/licenses/LICENSE-2.0
+.. 
+.. Unless required by applicable law or agreed to in writing, software
+.. distributed under the License is distributed on an "AS IS" BASIS,
+.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+.. See the License for the specific language governing permissions and
+.. limitations under the License.
+.. 
+
 Examples
 ========
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/include/singa/utils/timer.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/timer.h b/include/singa/utils/timer.h
index 1372d3c..f54bce8 100644
--- a/include/singa/utils/timer.h
+++ b/include/singa/utils/timer.h
@@ -1,3 +1,23 @@
+/**
+ * Copyright 2015 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #ifndef SINGA_UTILS_TIMER_H
 #define SINGA_UTILS_TIMER_H
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/rat_check
----------------------------------------------------------------------
diff --git a/rat_check b/rat_check
index 1fcfd7d..77f0a63 100644
--- a/rat_check
+++ b/rat_check
@@ -2,19 +2,19 @@
 *****************************************************
 Summary
 -------
-Generated at: 2016-08-16T12:50:50+08:00
-Notes: 4
+Generated at: 2016-08-17T13:11:07+08:00
+Notes: 5
 Binaries: 0
 Archives: 0
-Standards: 282
+Standards: 288
 
-Apache Licensed: 240
+Apache Licensed: 269
 Generated Documents: 0
 
 JavaDocs are generated and so license header is optional
 Generated files do not required license headers
 
-42 Unknown Licenses
+18 Unknown Licenses
 
 *******************************
 
@@ -22,45 +22,21 @@ Unapproved licenses:
 
   ./.gitmodules
   ./.travis.yml
-  ./CMakeLists.txt
   ./rat_check
-  ./cmake/Cuda.cmake
-  ./cmake/Dependencies.cmake
   ./cmake/Protobuf.cmake
-  ./cmake/Utils.cmake
-  ./cmake/Templates/singa_config.h.in
-  ./cmake/Thirdparty/FindCBLAS.cmake
-  ./cmake/Thirdparty/FindCUDNN.cmake
-  ./cmake/Thirdparty/FindGlog.cmake
-  ./cmake/Thirdparty/FindLMDB.cmake
   ./cmake/Thirdparty/FindOpenCL.cmake
-  ./examples/CMakeLists.txt
-  ./examples/index.rst
-  ./examples/cifar10/CMakeLists.txt
-  ./examples/cifar10/download_data.py
-  ./examples/cifar10/run-parallel.sh
-  ./examples/cifar10/run.sh
-  ./examples/imagenet/CMakeLists.txt
-  ./examples/imagenet/create_data.sh
-  ./examples/imagenet/run.sh
   ./include/singa/utils/cuda_utils.h
-  ./include/singa/utils/timer.h
   ./include/singa/utils/tinydir.h
-  ./src/CMakeLists.txt
+  ./lib/cnmem/.git
+  ./lib/cnmem/CMakeLists.txt
+  ./lib/cnmem/include/cnmem.h
+  ./lib/cnmem/src/cnmem.cpp
+  ./lib/cnmem/tests/cnmem_tests.cpp
   ./src/core/tensor/distribution.cl
-  ./src/python/setup.py.in
-  ./src/python/singa/__init__.py
   ./src/python/swig/numpy.i
-  ./test/CMakeLists.txt
-  ./test/gtest/CMakeLists.txt
   ./test/gtest/gtest-all.cc
   ./test/gtest/gtest.h
   ./test/gtest/gtest_main.cc
-  ./test/python/test_layer.py
-  ./test/singa/test_layer.cc
-  ./test/singa/test_tensor.cc
-  ./test/singa/test_tensor_math.cc
-  ./test/singa/test_timer.cc
   ./tool/cpplint.py
 
 *******************************
@@ -74,7 +50,7 @@ Archives:
   Notices, licenses etc will be marked N
  !????? ./.gitmodules
  !????? ./.travis.yml
- !????? ./CMakeLists.txt
+  AL    ./CMakeLists.txt
   N     ./DISCLAIMER
   N     ./LICENSE
   N     ./NOTICE
@@ -87,38 +63,38 @@ Archives:
   AL    ./bin/singa-run.sh
   AL    ./bin/singa-stop.sh
   AL    ./bin/zk-service.sh
- !????? ./cmake/Cuda.cmake
- !????? ./cmake/Dependencies.cmake
+  AL    ./cmake/Cuda.cmake
+  AL    ./cmake/Dependencies.cmake
  !????? ./cmake/Protobuf.cmake
- !????? ./cmake/Utils.cmake
- !????? ./cmake/Templates/singa_config.h.in
- !????? ./cmake/Thirdparty/FindCBLAS.cmake
- !????? ./cmake/Thirdparty/FindCUDNN.cmake
- !????? ./cmake/Thirdparty/FindGlog.cmake
- !????? ./cmake/Thirdparty/FindLMDB.cmake
+  AL    ./cmake/Utils.cmake
+  AL    ./cmake/Templates/singa_config.h.in
+  AL    ./cmake/Thirdparty/FindCBLAS.cmake
+  AL    ./cmake/Thirdparty/FindCUDNN.cmake
+  AL    ./cmake/Thirdparty/FindGlog.cmake
+  AL    ./cmake/Thirdparty/FindLMDB.cmake
  !????? ./cmake/Thirdparty/FindOpenCL.cmake
- !????? ./examples/CMakeLists.txt
- !????? ./examples/index.rst
+  AL    ./examples/CMakeLists.txt
+  AL    ./examples/index.rst
   AL    ./examples/char-rnn/sample.py
   AL    ./examples/char-rnn/train.py
- !????? ./examples/cifar10/CMakeLists.txt
+  AL    ./examples/cifar10/CMakeLists.txt
   AL    ./examples/cifar10/alexnet-parallel.cc
   AL    ./examples/cifar10/alexnet.cc
   AL    ./examples/cifar10/alexnet.py
   AL    ./examples/cifar10/cifar10.h
- !????? ./examples/cifar10/download_data.py
+  AL    ./examples/cifar10/download_data.py
   AL    ./examples/cifar10/predict.py
- !????? ./examples/cifar10/run-parallel.sh
- !????? ./examples/cifar10/run.sh
+  AL    ./examples/cifar10/run-parallel.sh
+  AL    ./examples/cifar10/run.sh
   AL    ./examples/cifar10/train.py
   AL    ./examples/cifar10/vgg-parallel.cc
   AL    ./examples/cifar10/vgg.py
- !????? ./examples/imagenet/CMakeLists.txt
+  AL    ./examples/imagenet/CMakeLists.txt
   AL    ./examples/imagenet/alexnet.cc
- !????? ./examples/imagenet/create_data.sh
+  AL    ./examples/imagenet/create_data.sh
   AL    ./examples/imagenet/ilsvrc12.cc
   AL    ./examples/imagenet/ilsvrc12.h
- !????? ./examples/imagenet/run.sh
+  AL    ./examples/imagenet/run.sh
   AL    ./examples/mnist/train.py
   AL    ./include/singa/core/common.h
   AL    ./include/singa/core/device.h
@@ -149,9 +125,15 @@ Archives:
   AL    ./include/singa/utils/safe_queue.h
   AL    ./include/singa/utils/singleton.h
   AL    ./include/singa/utils/string.h
- !????? ./include/singa/utils/timer.h
+  AL    ./include/singa/utils/timer.h
  !????? ./include/singa/utils/tinydir.h
- !????? ./src/CMakeLists.txt
+ !????? ./lib/cnmem/.git
+ !????? ./lib/cnmem/CMakeLists.txt
+  N     ./lib/cnmem/LICENSE
+ !????? ./lib/cnmem/include/cnmem.h
+ !????? ./lib/cnmem/src/cnmem.cpp
+ !????? ./lib/cnmem/tests/cnmem_tests.cpp
+  AL    ./src/CMakeLists.txt
   AL    ./src/core/device/cpp_cpu.cc
   AL    ./src/core/device/cuda_gpu.cc
   AL    ./src/core/device/device.cc
@@ -238,8 +220,8 @@ Archives:
   AL    ./src/proto/core.proto
   AL    ./src/proto/io.proto
   AL    ./src/proto/model.proto
- !????? ./src/python/setup.py.in
- !????? ./src/python/singa/__init__.py
+  AL    ./src/python/setup.py.in
+  AL    ./src/python/singa/__init__.py
   AL    ./src/python/singa/command.py
   AL    ./src/python/singa/device.py
   AL    ./src/python/singa/initializer.py
@@ -259,15 +241,16 @@ Archives:
   AL    ./src/python/swig/model_optimizer.i
  !????? ./src/python/swig/numpy.i
   AL    ./src/python/swig/singa.i
+   ./src/python/swig/singa_wrap.cxx
   AL    ./src/utils/channel.cc
   AL    ./src/utils/logging.cc
   AL    ./src/utils/opencl_utils.cc
- !????? ./test/CMakeLists.txt
- !????? ./test/gtest/CMakeLists.txt
+  AL    ./test/CMakeLists.txt
+  AL    ./test/gtest/CMakeLists.txt
  !????? ./test/gtest/gtest-all.cc
  !????? ./test/gtest/gtest.h
  !????? ./test/gtest/gtest_main.cc
- !????? ./test/python/test_layer.py
+  AL    ./test/python/test_layer.py
   AL    ./test/python/test_optimizer.py
   AL    ./test/python/test_tensor.py
   AL    ./test/singa/test_accuracy.cc
@@ -295,7 +278,7 @@ Archives:
   AL    ./test/singa/test_image_transformer.cc
   AL    ./test/singa/test_initializer.cc
   AL    ./test/singa/test_jpg.cc
- !????? ./test/singa/test_layer.cc
+  AL    ./test/singa/test_layer.cc
   AL    ./test/singa/test_lmdb_rw.cc
   AL    ./test/singa/test_logging.cc
   AL    ./test/singa/test_lrn.cc
@@ -310,10 +293,10 @@ Archives:
   AL    ./test/singa/test_sgd.cc
   AL    ./test/singa/test_snapshot.cc
   AL    ./test/singa/test_softmax.cc
- !????? ./test/singa/test_tensor.cc
- !????? ./test/singa/test_tensor_math.cc
+  AL    ./test/singa/test_tensor.cc
+  AL    ./test/singa/test_tensor_math.cc
   AL    ./test/singa/test_textfile_rw.cc
- !????? ./test/singa/test_timer.cc
+  AL    ./test/singa/test_timer.cc
   AL    ./thirdparty/install.sh
  !????? ./tool/cpplint.py
   AL    ./tool/graph.py
@@ -396,149 +379,16 @@ script:
 
 
 =======================================================================
-==./CMakeLists.txt
-=======================================================================
-CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
-
-PROJECT(singa)
-SET(PACKAGE_VERSION "1.0.0")
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g -O2 ")
-
-LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
-#message(STATUS "module path: ${CMAKE_MODULE_PATH}")
-
-# Flags
-IF(UNIX OR APPLE)
-  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall")
-ENDIF()
-IF(CMAKE_BUILD_TYPE=Debug)
-  SET(NVCC_FLAG "${NVCC_FLAG} -g -G ")
-ENDIF()
-#message(STATUS "${CMAKE_CXX_FLAGS}")
-SET(SINGA_INCLUDE_DIR
-    "${CMAKE_SOURCE_DIR}/include;${CMAKE_SOURCE_DIR}/lib/cnmem/include;${PROJECT_BINARY_DIR}")
-INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
-
-OPTION(USE_CBLAS "Use CBlas libs" ON)
-OPTION(USE_CUDA "Use Cuda libs" ON)
-OPTION(USE_CUDNN "Use Cudnn libs" ON)
-OPTION(USE_OPENCV "Use opencv" OFF)
-OPTION(USE_LMDB "Use LMDB libs" OFF)
-OPTION(USE_PYTHON "Generate py wrappers" ON)
-OPTION(USE_OPENCL "Use OpenCL" OFF)
-OPTION(ENABLE_DIST "enable distributed training" OFF)
-#OPTION(BUILD_OPENCL_TESTS "Build OpenCL tests" OFF)
-
-INCLUDE("cmake/Dependencies.cmake")
-INCLUDE("cmake/Utils.cmake")
-ADD_DEFINITIONS(-DUSE_CMAKE)
-#message(STATUS "${SINGA_INCLUDE_DIR}")
-
-CONFIGURE_FILE (
-    "${PROJECT_SOURCE_DIR}/cmake/Templates/singa_config.h.in"
-    "${PROJECT_BINARY_DIR}/include/singa/singa_config.h")
-
-#set(SINGA_CONFIGURE_SRC "${PROJECT_BINARY_DIR}/singa_config.h")
-#LIST(APPEND SRCS ${SINGA_CONFIGURE_SRCS} ${PROJECT_BINARY_DIR}/singa_config.h)
-
-SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
-SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
-
-IF (USE_CUDA)
-    ADD_SUBDIRECTORY(lib/cnmem)
-    LIST(APPEND SINGA_LINKER_LIBS cnmem)
-ENDIF()
-
-=======================================================================
 ==./rat_check
 =======================================================================
 
 =======================================================================
-==./cmake/Cuda.cmake
-=======================================================================
-
-FIND_PACKAGE(CUDA 5.5 QUIET)
-
-IF(NOT CUDA_FOUND)
-    return()
-ENDIF()
-
-SET(HAVE_CUDA TRUE)
-MESSAGE(STATUS "Found cuda_v${CUDA_VERSION}")
-#ADD_DEFINITIONS(-DUSE_CUDA)
-#message(STATUS "linking: ${CUDA_CUDART_LIBRARY} ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}")
-
-IF(USE_CUDNN)
-#include(cmake/Modules/Cudnn.cmake)
-    FIND_PACKAGE(CUDNN REQUIRED)
-    INCLUDE_DIRECTORIES(SYSTEM ${CUDNN_INCLUDE_DIR})
-    LIST(APPEND SINGA_LINKER_LIBS ${CUDNN_LIBRARIES})
-    #ADD_DEFINITIONS(-DUSE_CUDNN)
-    #ADD_DEFINITIONS(-DCUDNN_VERSION_MAJOR=${CUDNN_VERSION_MAJOR})
-ENDIF()
-
-INCLUDE_DIRECTORIES(SYSTEM ${CUDA_INCLUDE_DIRS})
-LIST(APPEND SINGA_LINKER_LIBS ${CUDA_CUDART_LIBRARY} ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
-#MESSAGE(STATUS "libs " ${SINGA_LINKER_LIBS})
-
-=======================================================================
-==./cmake/Dependencies.cmake
-=======================================================================
-SET(SINGA_LINKER_LIBS "")
-
-#INCLUDE("cmake/ProtoBuf.cmake")
-
-FIND_PACKAGE( Protobuf REQUIRED )
-INCLUDE_DIRECTORIES(SYSTEM ${PROTOBUF_INCLUDE_DIR})
-MESSAGE(STATUS "proto libs " ${PROTOBUF_LIBRARIES})
-LIST(APPEND SINGA_LINKER_LIBS ${PROTOBUF_LIBRARIES})
-INCLUDE("cmake/Protobuf.cmake")
-
-#FIND_PACKAGE(Glog)
-#IF(GLOG_FOUND)
-#    MESSAGE(STATUS "GLOG FOUND at ${GLOG_INCLUDE_DIR}")
-#    ADD_DEFINITIONS("-DUSE_GLOG")
-#    LIST(APPEND SINGA_LINKER_LIBS ${GLOG_LIBRARIES})
-#ENDIF()
-
-IF(USE_LMDB)
-    FIND_PACKAGE(LMDB REQUIRED)
-    INCLUDE_DIRECTORIES(SYSTEM ${LMDB_INCLUDE_DIR})
-    LIST(APPEND SINGA_LINKER_LIBS ${LMDB_LIBRARIES})
-    MESSAGE(STATUS "FOUND lmdb at ${LMDB_INCLUDE_DIR}")
-ENDIF()
-
-IF(USE_CUDA)
-    INCLUDE("cmake/Cuda.cmake")
-ELSE()
-    SET(USE_CUDNN FALSE)
-ENDIF()
-
-IF(USE_CBLAS)
-    FIND_PACKAGE(CBLAS REQUIRED)
-    INCLUDE_DIRECTORIES(SYSTEM ${CBLAS_INCLUDE_DIR})
-    LIST(APPEND SINGA_LINKER_LIBS ${CBLAS_LIBRARIES})
-    MESSAGE(STATUS "FOUND cblas at ${CBLAS_LIBRARIES}")
-ENDIF()
-
-IF(USE_OPENCL)
-    FIND_PACKAGE(OpenCL REQUIRED)
-    IF(NOT OPENCL_FOUND)
-        MESSAGE(SEND_ERROR "OpenCL was requested, but not found.")
-    ELSE()
-        INCLUDE_DIRECTORIES(SYSTEM ${OpenCL_INCPATH})
-        LIST(APPEND SINGA_LINKER_LIBS ${OPENCL_LIBRARIES})
-        MESSAGE(STATUS "Found OpenCL at ${OPENCL_INCLUDE_DIRS}")
-        IF(NOT OPENCL_HAS_CPP_BINDINGS)
-            MESSAGE(SEND_ERROR "OpenCL C++ bindings cl2.hpp was not found.")
-        ELSE()
-            MESSAGE(STATUS "Found OpenCL C++ bindings.")
-        ENDIF()
-
-=======================================================================
 ==./cmake/Protobuf.cmake
 =======================================================================
-# copy from cmake source code
+# This script is taken from
+# https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
+# and modified to our compilation.
+
 function(PROTOBUF_GENERATE_PYTHON OUTPUT)
     if(NOT ARGN)
         message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called 
@@ -568,174 +418,6 @@ function(PROTOBUF_GENERATE_PYTHON OUTPUT)
 endfunction()
 
 =======================================================================
-==./cmake/Utils.cmake
-=======================================================================
-
-macro(swig_generate_cxx pylist_variable)
-    if(NOT EXISTS "${CMKAE_BINARY_DIR}/python")
-        execute_process(
-            COMMAND mkdir ${CMAKE_BINARY_DIR}/python
-            COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa
-            COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa/proto
-            ERROR_QUIET)
-    endif()
-    execute_process(
-        COMMAND swig -c++ -python -I${CMAKE_SOURCE_DIR}/include 
-        -outdir ${CMAKE_BINARY_DIR}/python/singa
-        ${ARGN})
-
-    set(${pylist_variable} "${CMAKE_SOURCE_DIR}/src/python/swig/singa_wrap.cxx")
-endmacro()
-
-function (create_symlinks)
-    # Do nothing if building in-source
-    if (${CMAKE_CURRENT_BINARY_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
-        return()
-    endif()
-
-    foreach (path_file ${ARGN})
-        get_filename_component(folder ${path_file} PATH)
-
-        # Create REAL folder
-        file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/${folder}")
-
-        # Delete symlink if it exists
-        file(REMOVE "${CMAKE_BINARY_DIR}/${path_file}")
-
-        # Get OS dependent path to use in `execute_process`
-        file(TO_NATIVE_PATH "${CMAKE_BINARY_DIR}/${path_file}" link)
-        file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${path_file}" target)
-
-        if (UNIX)
-            set(command ln -s ${target} ${link})
-        else()
-            set(command cmd.exe /c mklink ${link} ${target})
-        endif()
-
-        execute_process(COMMAND ${command} 
-                        RESULT_VARIABLE result
-                        ERROR_VARIABLE output)
-
-        if (NOT ${result} EQUAL 0)
-            message(FATAL_ERROR "Could not create symbolic link for: ${target} --> ${output}")
-        endif()
-
-
-=======================================================================
-==./cmake/Templates/singa_config.h.in
-=======================================================================
-// Source directory
-#define SOURCE_FOLDER "${PROJECT_SOURCE_DIR}"
-
-// Binaries director
-#define BINARY_FOLDER "${PROJECT_BINARY_DIR}"
-
-#cmakedefine CPU_ONLY
-
-#cmakedefine USE_CBLAS
-
-#cmakedefine USE_OPENCV
-// cuda
-#cmakedefine USE_CUDA
-
-#cmakedefine USE_CUDNN
-#cmakedefine CUDNN_VERSION_MAJOR @CUDNN_VERSION_MAJOR@
-#cmakedefine CUDNN_VERSION_MINOR @CUDNN_VERSION_MINOR@
-#cmakedefine CUDNN_VERSION_PATCH @CUDNN_VERSION_PATCH@
-#cmakedefine CUDNN_VERSION_SWIG @CUDNN_VERSION_SWIG@
-
-#cmakedefine USE_OPENCL
-
-#cmakedefine ENABLE_DIST
-
-// lmdb
-#cmakedefine USE_LMDB
-
-
-=======================================================================
-==./cmake/Thirdparty/FindCBLAS.cmake
-=======================================================================
-
-FIND_PATH(CBLAS_INCLUDE_DIR NAMES cblas.h PATHS "$ENV{CBLAS_DIR}/include")
-FIND_LIBRARY(CBLAS_LIBRARIES NAMES openblas PATHS "$ENV{CBLAS_DIR}/lib")
-
-INCLUDE(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(CBLAS DEFAULT_MSG CBLAS_INCLUDE_DIR CBLAS_LIBRARIES)
-
-IF(CBLAS_FOUND)
-    #    MESSAGE(STATUS "Found cblas at ${CBLAS_INCLUDE_DIR}")
-    MARK_AS_ADVANCED(CBLAS_INCLUDE_DIR CBLAS_LIBRARIES)
-ENDIF()
-
-=======================================================================
-==./cmake/Thirdparty/FindCUDNN.cmake
-=======================================================================
-
-FIND_PATH(CUDNN_INCLUDE_DIR NAME "cudnn.h" PATHS "$ENV{CMAKE_INCLUDE_PATH}")
-FIND_LIBRARY(CUDNN_LIBRARIES NAME "libcudnn.so" PATHS "$ENV{CMAKE_LIBRARY_PATH}")
-
-#message("cudnn include path:${CUDNN_INCLUDE_DIR}  lib path: ${CUDNN_LIBRARIES}")
-#message("env include path:$ENV{CUDNN_DIR} next: $ENV{CMAKE_INCLUDE_PATH}")
-INCLUDE(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARIES)
-
-IF(CUDNN_FOUND)
-    FILE(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
-    STRING(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
-        CUDNN_VERSION_MAJOR "${CUDNN_VERSION_FILE_CONTENTS}")
-    STRING(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
-        CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}")
-    STRING(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
-        CUDNN_VERSION_MINOR "${CUDNN_VERSION_FILE_CONTENTS}")
-    STRING(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
-        CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}")
-    STRING(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
-        CUDNN_VERSION_PATCH "${CUDNN_VERSION_FILE_CONTENTS}")
-    STRING(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
-        CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
-
-    IF(NOT CUDNN_VERSION_MAJOR)
-        SET(CUDNN_VERSION "???")
-    ELSE()
-      MATH(EXPR CUDNN_VERSION_SWIG "${CUDNN_VERSION_MAJOR} * 1000 + ${CUDNN_VERSION_MINOR} * 100 + ${CUDNN_VERSION_PATCH}")
-    ENDIF()
-    MESSAGE(STATUS "Found Cudnn_v${CUDNN_VERSION_SWIG} at ${CUDNN_INCLUDE_DIR} ${CUDNN_LIBRARIES}")
-    MARK_AS_ADVANCED(CUDNN_INCLUDE_DIR CUDNN_LIBRARIES)
-
-ENDIF()
-
-=======================================================================
-==./cmake/Thirdparty/FindGlog.cmake
-=======================================================================
-
-FIND_PATH(GLOG_INCLUDE_DIR NAMES glog/logging.h PATHS "$ENV{GLOG_DIR}/include")
-FIND_LIBRARY(GLOG_LIBRARIES NAMES glog)
-
-INCLUDE(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(GLOG DEFAULT_MSG GLOG_INCLUDE_DIR GLOG_LIBRARIES)
-
-IF(GLOG_FOUND)
-    #    MESSAGE(STATUS "Found glog at ${GLOG_INCLUDE_DIR}")
-    MARK_AS_ADVANCED(GLOG_INCLUDE_DIR GLOG_LIBRARIES)
-ENDIF()
-
-=======================================================================
-==./cmake/Thirdparty/FindLMDB.cmake
-=======================================================================
-
-FIND_PATH(LMDB_INCLUDE_DIR NAMES lmdb.h PATHS "$ENV{LMDB_DIR}/include")
-FIND_LIBRARY(LMDB_LIBRARIES NAMES lmdb PATHS "$ENV{LMDB_DIR}/include")
-
-INCLUDE(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(LMDB DEFAULT_MSG LMDB_INCLUDE_DIR LMDB_LIBRARIES)
-
-IF(LMDB_FOUND)
-    MESSAGE(STATUS "Found lmdb at ${LMDB_INCLUDE_DIR}")
-    MARK_AS_ADVANCED(LMDB_INCLUDE_DIR LMDB_LIBRARIES)
-    
-ENDIF()
-
-=======================================================================
 ==./cmake/Thirdparty/FindOpenCL.cmake
 =======================================================================
 # This script was taken from https://github.com/elhigu/cmake-findopencl
@@ -790,149 +472,6 @@ ELSE (APPLE)
 		# Nvidia
 
 =======================================================================
-==./examples/CMakeLists.txt
-=======================================================================
-ADD_SUBDIRECTORY(cifar10)
-ADD_SUBDIRECTORY(imagenet)
-
-=======================================================================
-==./examples/index.rst
-=======================================================================
-Examples
-========
-
-.. toctree::
-
-   cifar10/README
-   char-rnn/README
-   imagenet/README
-
-
-
-=======================================================================
-==./examples/cifar10/CMakeLists.txt
-=======================================================================
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
-
-IF(USE_CUDNN)
-ADD_EXECUTABLE(alexnet alexnet.cc)
-ADD_DEPENDENCIES(alexnet singa_core singa_model singa_utils)
-TARGET_LINK_LIBRARIES(alexnet singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
-
-ADD_EXECUTABLE(alexnet-parallel alexnet-parallel.cc)
-ADD_DEPENDENCIES(alexnet-parallel singa_core singa_model singa_utils)
-TARGET_LINK_LIBRARIES(alexnet-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
-SET_TARGET_PROPERTIES(alexnet-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
-
-ADD_EXECUTABLE(vgg-parallel vgg-parallel.cc)
-ADD_DEPENDENCIES(vgg-parallel singa_core singa_model singa_utils)
-TARGET_LINK_LIBRARIES(vgg-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
-SET_TARGET_PROPERTIES(vgg-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
-ENDIF(USE_CUDNN)
-
-=======================================================================
-==./examples/cifar10/download_data.py
-=======================================================================
-#!/usr/bin/env python
-import urllib
-import tarfile
-import os
-import sys
-import argparse
-
-
-def extract_tarfile(filepath):
-    if os.path.exists(filepath):
-        print 'The tar file does exist. Extracting it now..'
-        with tarfile.open(filepath, 'r') as f:
-            f.extractall('.')
-        print 'Finished!'
-        sys.exit(0)
-
-
-def check_dir_exist(dirpath):
-    if os.path.exists(dirpath):
-        print 'Directory %s does exist. To redownload the files, '\
-            'remove the existing directory and %s.tar.gz' % (dirpath, dirpath)
-        return True
-    else:
-        return False
-
-
-def do_download(dirpath, gzfile, url):
-    if check_dir_exist(dirpath):
-        sys.exit(0)
-    print 'Downloading CIFAR10 from %s' % (url)
-    urllib.urlretrieve(url, gzfile)
-    extract_tarfile(gzfile)
-    print 'Finished!'
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Download Cifar10 datasets')
-    parser.add_argument(
-        'file',
-        type=str,
-        choices=['py', 'bin'])
-    args = parser.parse_args()
-    if args.file == 'bin':
-        dirpath = 'cifar-10-batches-bin'
-        gzfile = 'cifar-10-binary' + '.tar.gz'
-        url = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
-        do_download(dirpath, gzfile, url)
-    else:
-        dirpath = 'cifar-10-batches-py'
-        gzfile = 'cifar-10-python' + '.tar.gz'
-
-=======================================================================
-==./examples/cifar10/run-parallel.sh
-=======================================================================
-#!/usr/bin/env sh
-../../build/bin/alexnet-parallel -epoch 4
-#../../build/bin/vgg-parallel -epoch 4
-
-=======================================================================
-==./examples/cifar10/run.sh
-=======================================================================
-#!/usr/bin/env sh
-../../build/bin/alexnet -epoch 140
-
-=======================================================================
-==./examples/imagenet/CMakeLists.txt
-=======================================================================
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
-
-IF(USE_CUDNN)
-  IF(USE_OPENCV)
-  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp ")
-    ADD_EXECUTABLE(imagenet alexnet.cc)
-    ADD_DEPENDENCIES(imagenet singa_core singa_model singa_utils singa_io)
-    TARGET_LINK_LIBRARIES(imagenet singa_core singa_utils singa_model singa_io protobuf ${SINGA_LIBKER_LIBS})
-
-    ADD_EXECUTABLE(createdata ilsvrc12.cc)
-    ADD_DEPENDENCIES(createdata singa_core singa_io singa_model singa_utils)
-    TARGET_LINK_LIBRARIES(createdata singa_core singa_utils singa_io singa_model protobuf ${SINGA_LIBKER_LIBS})
-    #SET_TARGET_PROPERTIES(createdata PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
-  ENDIF(USE_OPENCV)
-ENDIF(USE_CUDNN)
-
-=======================================================================
-==./examples/imagenet/create_data.sh
-=======================================================================
-#!/usr/bin/env sh
-../../build/bin/createdata -trainlist "imagenet/label/train.txt" -trainfolder "imagenet/ILSVRC2012_img_train" \
-  -testlist "imagenet/label/val.txt" -testfolder "imagenet/ILSVRC2012_img_val" -outdata "imagenet_data" -filesize 1280
-
-=======================================================================
-==./examples/imagenet/run.sh
-=======================================================================
-#!/usr/bin/env sh
-../../build/bin/imagenet -epoch 90 -lr 0.01 -batchsize 256 -filesize 1280 -ntrain 1281167 -ntest 50000 \
-  -data "imagenet_data" -pfreq 100 -nthreads 12
-
-=======================================================================
 ==./include/singa/utils/cuda_utils.h
 =======================================================================
 // from caffe include/caffe/util/device_alternative.hpp
@@ -987,50 +526,6 @@ inline const char* curandGetErrorString(curandStatus_t error) {
   case CURAND_STATUS_ALLOCATION_FAILED:
 
 =======================================================================
-==./include/singa/utils/timer.h
-=======================================================================
-#ifndef SINGA_UTILS_TIMER_H
-#define SINGA_UTILS_TIMER_H
-
-#include <chrono>
-
-namespace singa {
-
-/// For benchmarking the time cost of operations.
-class Timer {
- public:
-  typedef std::chrono::duration<int> Seconds;
-  typedef std::chrono::duration<int, std::milli> Milliseconds;
-  typedef std::chrono::duration<int, std::ratio<60 * 60>> Hours;
-  typedef std::chrono::duration<int, std::micro> Microseconds;
-
-  /// Init the internal time point to the current time
-  Timer() { Tick(); }
-  /// Reset the internal time point to the current time
-  void Tick() { last_ = std::chrono::high_resolution_clock::now(); }
-  /// Return the duration since last call to Tick() or since the creation of
-  /// Timer. The template arg must be from Second or Millisecond or Hour.
-  /// The returned value is the count of the time metric.
-  template <typename T = Milliseconds>
-  int Elapsed() const {
-    static_assert(std::is_same<T, Seconds>::value ||
-                      std::is_same<T, Milliseconds>::value ||
-                      std::is_same<T, Hours>::value ||
-                      std::is_same<T, Microseconds>::value,
-                  "Template arg must be Seconds | Milliseconds | Hours | Microseconds");
-    auto now  = std::chrono::high_resolution_clock::now();
-    return std::chrono::duration_cast<T>(now - last_).count();
-  }
-  /// Return the string rep of current wall time
-  // std::string CurrentTime();
-
- private:
-  std::chrono::high_resolution_clock::time_point last_;
-};
-}
-#endif
-
-=======================================================================
 ==./include/singa/utils/tinydir.h
 =======================================================================
 /*
@@ -1085,58 +580,220 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #else
 
 =======================================================================
-==./src/CMakeLists.txt
+==./lib/cnmem/.git
+=======================================================================
+gitdir: ../../.git/modules/lib/cnmem
+
+=======================================================================
+==./lib/cnmem/CMakeLists.txt
+=======================================================================
+# CMakeLists to build the cnmem library.
+cmake_minimum_required(VERSION 2.8.8)
+project(cnmem)
+
+# We need CUDA to build that library.
+find_package(CUDA QUIET REQUIRED)
+include_directories(${CUDA_INCLUDE_DIRS})
+
+# Rules to build the cnmem library.
+include_directories(include)
+add_definitions(-DCNMEM_DLLEXPORT)
+add_library(cnmem SHARED src/cnmem.cpp)
+set_target_properties(cnmem PROPERTIES VERSION 1.0.0 SOVERSION 1)
+target_link_libraries(cnmem LINK_PUBLIC ${CUDA_LIBRARIES})
+install(TARGETS cnmem RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib)
+install(FILES include/cnmem.h DESTINATION include)
+
+# Add the tests.
+if(WITH_TESTS)
+
+  # Get Google tests.
+  find_package(GTest QUIET REQUIRED)
+  include_directories(${GTEST_INCLUDE_DIRS})
+  
+  # Build the executable.
+  add_executable(cnmem_tests tests/cnmem_tests.cpp)
+  if(MSVC)
+    if(MSVC_VERSION GREATER 1700) # Visual Studio 11 or more.
+      add_definitions(-DUSE_CPP_11)
+    endif(MSVC_VERSION GREATER 1700)
+  endif(MSVC)
+  if(CMAKE_COMPILER_IS_GNUCC)
+    add_definitions(-std=c++11 -DUSE_CPP_11)
+  endif(CMAKE_COMPILER_IS_GNUCC)
+  target_link_libraries(cnmem_tests LINK_PUBLIC cnmem ${CUDA_LIBRARIES} ${GTEST_LIBRARIES} -lpthread)
+  install(TARGETS cnmem_tests RUNTIME DESTINATION bin)
+  
+  # On Windows, we copy the Google test DLL to the bin folder.
+  if(MSVC)
+    get_filename_component(gtest_dll_path ${GTEST_LIBRARIES} DIRECTORY)
+    install(FILES ${gtest_dll_path}/gtest.dll DESTINATION bin)
+  endif(MSVC)
+
+endif(WITH_TESTS)
+
+
+=======================================================================
+==./lib/cnmem/include/cnmem.h
+=======================================================================
+/* ********************************************************************** 
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ********************************************************************** */
+#pragma once
+
+#ifdef __cplusplus
+#include "cstdio"
+#else
+#include "stdio.h"
+#endif
+#include "cuda_runtime_api.h"
+
+#if defined(_MSC_VER) || defined(WIN32)
+#ifdef CNMEM_DLLEXPORT
+#define CNMEM_API __declspec(dllexport)
+#else
+#define CNMEM_API __declspec(dllimport)
+#endif
+#else
+#ifdef CNMEM_DLLEXPORT
+#define CNMEM_API __attribute__((visibility ("default")))
+#else
+#define CNMEM_API
+#endif
+#endif
+
+
+=======================================================================
+==./lib/cnmem/src/cnmem.cpp
 =======================================================================
-# generate protobuf sources 
-
-FILE(GLOB proto_files proto/*.proto) 
-protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_files})
-IF (USE_PYTHON)
-    protobuf_generate_python(proto_pys ${proto_files})
-ENDIF()
-INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include")
-
-#message(STATUS "include: ${CMAKE_BINARY_DIR} ")
-#message(STATUS "srcs: ${proto_srcs}")
-#message(STATUS "hdrs: ${proto_hdrs}")
-#message(STATUS "pys: ${proto_pys}")
-ADD_LIBRARY(singa_proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_pys})
-FOREACH(fil ${proto_hdrs})
-    ADD_CUSTOM_COMMAND(
-        TARGET singa_proto PRE_BUILD
-        COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/include/singa/proto"
-        COMMAND ${CMAKE_COMMAND} -E copy ${fil} "${CMAKE_BINARY_DIR}/include/singa/proto"
-        #COMMAND ${CMAKE_COMMAND} -E echo "copy done"
-        )
-ENDFOREACH()
-LIST(APPEND SINGA_LINKER_LIBS singa_proto)
-
-SET(PREVIOUS_LINKER_LIBS ${SINGA_LINKER_LIBS})
-
-#FILE(GLOB_RECURSE utils_source ${CMAKE_CURRENT_SOURCE_DIR}/utils/ "*.cc")
-AUX_SOURCE_DIRECTORY(utils utils_source)
-#message(STATUS "UTILS ${utils_source}")
-ADD_LIBRARY(singa_utils SHARED ${utils_source})
-TARGET_LINK_LIBRARIES(singa_utils ${SINGA_LINKER_LIBS})
-LIST(APPEND SINGA_LINKER_LIBS singa_utils)
-
-#FILE(GLOB_RECURSE core_source ${CMAKE_CURRENT_SOURCE_DIR}/core/ "*.cc")
-AUX_SOURCE_DIRECTORY(core/device core_source)
-AUX_SOURCE_DIRECTORY(core/memory core_source)
-AUX_SOURCE_DIRECTORY(core/scheduler core_source)
-AUX_SOURCE_DIRECTORY(core/tensor core_source)
-IF (USE_CUDA)
-    FILE(GLOB_RECURSE cuda_source core "*.cu")
-    SET(FLAGS_BACKUP ${CMAKE_CXX_FLAGS})
-    SET(CMAKE_CXX_FLAGS "")
-    IF (CMAKE_BUILD_TYPE MATCHES DEBUG)
-        CUDA_COMPILE(cuda_objs SHARED ${cuda_source}
-            OPTIONS "-Xcompiler -fPIC -G -g")
-    ELSE (CMAKE_BUILD_TYPE MATCHES  DEBUG)
-        CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC")
-    ENDIF (CMAKE_BUILD_TYPE MATCHES DEBUG)
-    include_directories("${CMAKE_CURRENT_SOURCE_DIR}/core/tensor")
-    SET(CMAKE_CXX_FLAGS ${FLAGS_BACKUP})
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "cnmem.h"
+#include <cstddef>
+#include <vector>
+#include <cuda_runtime_api.h>
+
+#if !defined(WIN32) && defined(_MSC_VER)
+#define WIN32
+#endif
+
+#ifdef WIN32
+#include <Windows.h>
+#else
+#include <pthread.h>
+#endif
+
+#if defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4 // ARMv7 is the only 32-bit target that we support.
+#define CNMEM_BUILD_WITH_32_BIT_POINTERS
+#endif
+
+#define CNMEM_GRANULARITY 512
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+=======================================================================
+==./lib/cnmem/tests/cnmem_tests.cpp
+=======================================================================
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <gtest/gtest.h>
+#include <cnmem.h>
+#include <fstream>
+#ifdef USE_CPP_11
+#include <thread>
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+static std::size_t getFreeMemory() {
+    cudaFree(0);
+    std::size_t freeMem, totalMem;
+    cudaMemGetInfo(&freeMem, &totalMem);
+    return freeMem;
+}
+
+class CnmemTest : public ::testing::Test {
+    /// We determine the amount of free memory.
+    std::size_t mFreeMem;
+    
+protected:
+    /// Do we test memory leaks.
 
 =======================================================================
 ==./src/core/tensor/distribution.cl
@@ -1193,64 +850,6 @@ inline threefry4x32_ctr_t threefry4x32_R(unsigned int Nrounds, threefry4x32_ctr_
     X.v[0] = in.v[0];
 
 =======================================================================
-==./src/python/setup.py.in
-=======================================================================
-# Always prefer setuptools over distutils
-from setuptools import setup
-
-
-setup(
-    name='singa',
-
-    version='${PACKAGE_VERSION}',
-
-    description='A General Deep Learning System',
-
-    url='https://github.com/apache/incubator-singa',
-
-    author='Apache SINGA (incubating)',
-    author_email='dev@singa.incubator.apache.org',
-
-    license='Apache 2',
-
-    classifiers=[
-        #   3 - Alpha
-        #   4 - Beta
-        #   5 - Production/Stable
-        'Development Status :: 3 - Alpha',
-
-        'Intended Audience :: Developers',
-        'Topic :: Deep Learning System ',
-
-        'License :: Apache License',
-
-        # Specify the Python versions you support here. In particular, ensure
-        # that you indicate whether you support Python 2, Python 3 or both.
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6',
-        'Programming Language :: Python :: 2.7',
-        ],
-
-    keywords='deep learning singa apache',
-
-    packages= ['singa', 'singa.proto'],
-
-    #py_modules=["singa"],
-
-    install_requires=[
-        'numpy>=1.11.0',
-        'protobuf>=2.5.0,<3'
-        ],
-
-    #List additional groups of dependencies here (e.g. development
-    #dependencies). You can install these using the following syntax,
-    #for example:
-
-=======================================================================
-==./src/python/singa/__init__.py
-=======================================================================
-
-=======================================================================
 ==./src/python/swig/numpy.i
 =======================================================================
 /* -*- C -*-  (not really, but good for syntax highlighting) */
@@ -1305,43 +904,6 @@ setup(
 {
 
 =======================================================================
-==./test/CMakeLists.txt
-=======================================================================
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
-
-IF(ENABLE_DIST)
-  ADD_EXECUTABLE(test_ep "singa/test_ep.cc")
-  ADD_DEPENDENCIES(test_ep singa_io)
-  TARGET_LINK_LIBRARIES(test_ep singa_utils singa_io protobuf ${SINGA_LINKER_LIBS})
-ENDIF()
-
-ADD_LIBRARY(gtest STATIC EXCLUDE_FROM_ALL "gtest/gtest.h" "gtest/gtest-all.cc")
-
-AUX_SOURCE_DIRECTORY(singa singa_test_source)
-LIST(REMOVE_ITEM singa_test_source "singa/test_ep.cc")
-
-IF(NOT USE_OPENCL)
-    MESSAGE(STATUS "Skipping OpenCL tests")
-    LIST(REMOVE_ITEM singa_test_source "singa/test_opencl.cc")
-ENDIF()
-
-
-ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
-ADD_DEPENDENCIES(test_singa singa_core singa_utils)
-#MESSAGE(STATUS "link libs" ${singa_linker_libs})
-TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
-    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
-IF(UNIX AND (NOT APPLE))
-    LIST(APPEND LINK_FLAGS "-pthread")
-ENDIF()
-SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
-
-=======================================================================
-==./test/gtest/CMakeLists.txt
-=======================================================================
-
-=======================================================================
 ==./test/gtest/gtest-all.cc
 =======================================================================
 // Copyright 2008, Google Inc.
@@ -1492,216 +1054,6 @@ GTEST_API_ int main(int argc, char **argv) {
 }
 
 =======================================================================
-==./test/python/test_layer.py
-=======================================================================
-import sys
-import os
-import unittest
-import numpy as np
-
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
-
-from singa import layer
-from singa import device
-from singa import tensor
-from singa.proto import model_pb2
-
-
-def _tuple_to_string(t):
-    lt = [str(x) for x in t]
-    return '(' + ', '.join(lt) + ')'
-
-
-class TestPythonLayer(unittest.TestCase):
-
-    def check_shape(self, actual, expect):
-        self.assertEqual(actual, expect, 'shape mismatch, actual shape is %s'
-                         ' exepcted is %s' % (_tuple_to_string(actual),
-                                              _tuple_to_string(expect))
-                         )
-
-    def setUp(self):
-        layer.engine='singacpp'
-        self.w = {'init': 'Xavier', 'regularizer': 1e-4}
-        self.b = {'init': 'Constant', 'value': 0}
-        self.sample_shape = None
-
-    def test_conv2D_shape(self):
-        in_sample_shape = (3, 224, 224)
-        conv = layer.Conv2D('conv', 64, 3, 1, W_specs=self.w, b_specs=self.b,
-                            input_sample_shape=in_sample_shape)
-        out_sample_shape = conv.get_output_sample_shape()
-        self.check_shape(out_sample_shape, (64, 224, 224))
-
-    def test_conv2D_forward_backward(self):
-        in_sample_shape = (1, 3, 3)
-        conv = layer.Conv2D('conv', 1, 3, 2, W_specs=self.w, b_specs=self.b,
-                            pad=1, input_sample_shape=in_sample_shape)
-        # cuda = device.create_cuda_gpu()
-        # conv.to_device(cuda)
-        params = conv.param_values()
-
-        raw_x = np.arange(9, dtype=np.float32) + 1
-        x = tensor.from_numpy(raw_x)
-        x.reshape((1, 1, 3, 3))
-
-=======================================================================
-==./test/singa/test_layer.cc
-=======================================================================
-#include "gtest/gtest.h"
-#include "singa/model/layer.h"
-#include "singa/singa_config.h"
-
-TEST(Layer, CreateLayer) {
-  std::vector<std::string> types{
-      "convolution", "dense", "dropout", "relu", "batchnorm",
-      "flatten",     "lrn",   "pooling", "prelu",      "softmax"};
-  for (auto type : types) {
-    auto layer = singa::CreateLayer("singacpp_" + type);
-    // EXPECT_EQ(layer->layer_type(), type);
-  }
-}
-
-#ifdef USE_CUDNN
-TEST(Layer, CreateCudnnLayer) {
-  std::vector<std::string> types{
-      "convolution", "dropout", "relu", "batchnorm",
-      "lrn",   "pooling", "softmax"};
-#if CUDNN_VERSION_MAJOR >= 5
-  types.push_back("dropout");
-#endif
-  for (auto type : types) {
-    auto layer = singa::CreateLayer("cudnn_" + type);
-    // EXPECT_EQ(layer->layer_type(), type);
-  }
-}
-#endif
-
-=======================================================================
-==./test/singa/test_tensor.cc
-=======================================================================
-#include "gtest/gtest.h"
-#include "singa/core/tensor.h"
-using singa::Tensor;
-using singa::Shape;
-using singa::Device;
-
-TEST(TensorTest, TestConstructor) {
-  singa::Tensor float_t(singa::Shape{2,3});
-  EXPECT_EQ(6u, float_t.Size());
-  EXPECT_EQ(sizeof(float) * 6, float_t.MemSize());
-  EXPECT_EQ(singa::kFloat32, float_t.data_type());
-  auto s = float_t.shape();
-  EXPECT_EQ(s[0], 2u);
-  EXPECT_EQ(s[1], 3u);
-
-  EXPECT_NE(float_t.device(), nullptr);
-
-  singa::Tensor float16_t(Shape{2,3}, singa::kFloat16);
-  EXPECT_EQ(singa::kFloat16, float16_t.data_type());
-  EXPECT_EQ(6u, float16_t.Size());
-  EXPECT_EQ(12u, float16_t.block()->size());
-
-  singa::Tensor x(float16_t);
-  EXPECT_EQ(float16_t.Size(), x.Size());
-  EXPECT_EQ(float16_t.block(), x.block());
-  EXPECT_EQ(float16_t.data_type(), x.data_type());
-  EXPECT_EQ(float16_t.device(), x.device());
-
-  singa::Tensor y = float16_t;
-  EXPECT_EQ(float16_t.Size(), x.Size());
-  EXPECT_EQ(float16_t.block(), x.block());
-  EXPECT_EQ(float16_t.data_type(), x.data_type());
-  EXPECT_EQ(float16_t.device(), x.device());
-}
-
-TEST(TensorClass, Reshape) {
-  Tensor t;
-  t.Reshape(Shape{2,3});
-  EXPECT_TRUE((Shape{2,3} == t.shape()));
-
-  t.Reshape(Shape{3,3, 4});
-  EXPECT_TRUE((Shape{3,3, 4} == t.shape()));
-
-  t.Reshape(Shape{12});
-  EXPECT_TRUE((Shape{12} == t.shape()));
-
-  Tensor o;
-  EXPECT_TRUE(o.shape() != t.shape());
-  o.Reshape(Shape{3, 3});
-  EXPECT_TRUE(o.shape() != t.shape());
-
-=======================================================================
-==./test/singa/test_tensor_math.cc
-=======================================================================
-#include "gtest/gtest.h"
-#include "singa/core/tensor.h"
-using singa::Tensor;
-using singa::Shape;
-using singa::Device;
-
-class TestTensorMath : public ::testing::Test {
- protected:
-  virtual void SetUp() {
-    a.Reshape(singa::Shape{6});
-    b.Reshape(singa::Shape{6});
-    c.Reshape(singa::Shape{6, 1});
-    d.Reshape(singa::Shape{3, 2});
-    e.Reshape(singa::Shape{3, 2});
-
-    a.CopyDataFromHostPtr<float>(dat1, 6);
-    b.CopyDataFromHostPtr<float>(dat2, 6);
-    e.CopyDataFromHostPtr<float>(dat1, 6);
-  }
-  Tensor a, b, c, d, e;
-  const float dat1[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-  const float dat2[6] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f};
-};
-
-TEST_F(TestTensorMath, MemberAbs) {
-  Tensor aa = a.Clone();
-  Tensor bb = b.Clone();
-  Tensor cc = aa - bb;
-  const float *dptr = cc.data<float>();
-  EXPECT_NEAR(-0.1, dptr[0], 1e-5);
-  EXPECT_NEAR(-0.1, dptr[1], 1e-5);
-  EXPECT_NEAR(-0.1, dptr[2], 1e-5);
-
-  Tensor p = Abs(cc);
-  const float *dptr1 = p.data<float>();
-  EXPECT_NEAR(0.1, dptr1[0], 1e-5);
-  EXPECT_NEAR(0.1, dptr1[1], 1e-5);
-  EXPECT_NEAR(0.1, dptr1[2], 1e-5);
-}
-
-TEST_F(TestTensorMath, MemberExp) {
-  Tensor p = Exp(a);
-  const float *dptr1 = p.data<float>();
-  EXPECT_NEAR(exp(1.0f), dptr1[0], 1e-5);
-  EXPECT_NEAR(exp(2.0f), dptr1[1], 1e-5);
-  EXPECT_NEAR(exp(3.0f), dptr1[2], 1e-5);
-}
-
-TEST_F(TestTensorMath, MemberLog) {
-  Tensor p = Log(a);
-
-=======================================================================
-==./test/singa/test_timer.cc
-=======================================================================
-#include "gtest/gtest.h"
-#include "singa/utils/timer.h"
-
-#include <chrono>
-#include <thread>
-
-TEST(TimerTest, TestTick) {
-  singa::Timer t;
-  std::this_thread::sleep_for(std::chrono::milliseconds(1000));
-  int time = t.Elapsed<singa::Timer::Milliseconds>();
-  EXPECT_GE(time, 1000);
-}
-
-=======================================================================
 ==./tool/cpplint.py
 =======================================================================
 #!/usr/bin/env python

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4579a67..425986f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 # generate protobuf sources 
 
 FILE(GLOB proto_files proto/*.proto) 
@@ -95,7 +115,7 @@ IF(USE_PYTHON)
     create_symlinks(${python_source_files})
     
     SET(python_cxxs "${core_source};${io_source};${model_source};${utils_source}")
-    ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${python_cxxs} ${cuda_objs})
+    ADD_LIBRARY(_singa_wrap SHARED "${python_srcs} ${python_cxxs} ${cuda_objs}")
     SET(WRAPPER_LINKER_LIBS "${PREVIOUS_LINKER_LIBS}")
     TARGET_LINK_LIBRARIES(_singa_wrap ${WRAPPER_LINKER_LIBS})
     TARGET_INCLUDE_DIRECTORIES(_singa_wrap PRIVATE ${PYTHON_INCLUDE_DIRS})

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/src/python/setup.py.in
----------------------------------------------------------------------
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index f2cd9f3..004222c 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 # Always prefer setuptools over distutils
 from setuptools import setup
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index e69de29..750eb60 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -0,0 +1,21 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 593cfd6..7928bc1 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/test/gtest/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt
index e69de29..750eb60 100644
--- a/test/gtest/CMakeLists.txt
+++ b/test/gtest/CMakeLists.txt
@@ -0,0 +1,21 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/test/python/test_layer.py
----------------------------------------------------------------------
diff --git a/test/python/test_layer.py b/test/python/test_layer.py
index 441f352..f98a3c0 100644
--- a/test/python/test_layer.py
+++ b/test/python/test_layer.py
@@ -1,3 +1,23 @@
+#
+# Copyright 2015 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
 import sys
 import os
 import unittest

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/test/singa/test_layer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_layer.cc b/test/singa/test_layer.cc
index bb33dba..c913a17 100644
--- a/test/singa/test_layer.cc
+++ b/test/singa/test_layer.cc
@@ -1,3 +1,23 @@
+/**
+ * Copyright 2015 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "gtest/gtest.h"
 #include "singa/model/layer.h"
 #include "singa/singa_config.h"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/test/singa/test_tensor.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc
index f6f2ca3..a6d6bab 100644
--- a/test/singa/test_tensor.cc
+++ b/test/singa/test_tensor.cc
@@ -1,3 +1,23 @@
+/**
+ * Copyright 2015 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "gtest/gtest.h"
 #include "singa/core/tensor.h"
 using singa::Tensor;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/test/singa/test_tensor_math.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc
index 2803a23..0c0b4f8 100644
--- a/test/singa/test_tensor_math.cc
+++ b/test/singa/test_tensor_math.cc
@@ -1,3 +1,23 @@
+/**
+ * Copyright 2015 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "gtest/gtest.h"
 #include "singa/core/tensor.h"
 using singa::Tensor;

[30/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

1. copy the 'examples' folder into docs/ to generate htmls files using the README.md files
2. add software_stack.md to describe the major data structures of v1.0
3. add device.rst to introduce the Device APIs


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e963363a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e963363a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e963363a

Branch: refs/heads/master
Commit: e963363a6d99825d8f4472130559814347845194
Parents: 6b2ff3c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 11 23:13:12 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:02:47 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                      |   1 +
 doc/_static/images/singav1-sw.png | Bin 0 -> 24326 bytes
 doc/conf.py                       |  18 +++---
 doc/docs.rst                      |   6 +-
 doc/docs/device.rst               |  38 +++++++++++++
 doc/docs/examples.rst             |   6 ++
 doc/docs/index.rst                |  16 ++----
 doc/docs/jp/index.md              |  23 --------
 doc/docs/kr/index.md              |  23 --------
 doc/docs/software_stack.md        |  99 +++++++++++++++++++++++++++++++++
 doc/docs/tensor.rst               |  54 ++++++++++++++++++
 doc/docs/zh/index.md              |  10 ++--
 doc/index.rst                     |  28 +++++-----
 examples/index.rst                |   6 ++
 src/python/singa/device.py        |  31 +++++++++++
 src/python/singa/tensor.py        |  49 +++++++++++++---
 16 files changed, 311 insertions(+), 97 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index 62a2236..c6eddf1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -50,6 +50,7 @@ clean:
 
 .PHONY: html
 html:
+	cp -rf ../examples docs/
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/_static/images/singav1-sw.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/singav1-sw.png b/doc/_static/images/singav1-sw.png
new file mode 100644
index 0000000..e443c6e
Binary files /dev/null and b/doc/_static/images/singav1-sw.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 86dc031..9d4480e 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -16,9 +16,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, os.path.abspath('../build/python'))
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -34,9 +35,8 @@ source_parsers = {
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-   
-]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -49,7 +49,7 @@ source_suffix = ['.rst', '.md']
 
 # The encoding of source files.
 #
-# source_encoding = 'utf-8-sig'
+source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
@@ -149,7 +149,7 @@ html_theme = 'sphinx_rtd_theme'
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-html_logo = '/singa.png'
+html_logo = 'image/singa.png'
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -202,7 +202,7 @@ html_static_path = ['_static']
 
 # If true, links to the reST sources are added to the pages.
 #
-html_show_sourcelink = False
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
index 2ebea60..400b12a 100644
--- a/doc/docs.rst
+++ b/doc/docs.rst
@@ -2,7 +2,5 @@ Documentation
 =============
 
 .. toctree::
-	docs/index
- 	docs/zh/index
-	docs/jp/index
-	docs/kr/index
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
new file mode 100644
index 0000000..e79d87a
--- /dev/null
+++ b/doc/docs/device.rst
@@ -0,0 +1,38 @@
+Device
+=======
+
+
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Python API
+----------
+
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
+
+
+The following code provides examples of creating devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
new file mode 100644
index 0000000..b0b2af8
--- /dev/null
+++ b/doc/docs/examples.rst
@@ -0,0 +1,6 @@
+Examples
+========
+
+.. toctree::
+
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index a6a1b49..2f6352e 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -2,15 +2,9 @@ English
 =======
 
 .. toctree::
-	overview
-        installation
-	quick-start
-        programming-guide
-        distributed-training
-        data
-        checkpoint
-        python
-        test
-        gpu
-        examples
 
+   installation
+   software_stack
+   device
+   tensor
+   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/jp/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/jp/index.md b/doc/docs/jp/index.md
deleted file mode 100644
index 6679198..0000000
--- a/doc/docs/jp/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \u6700\u65b0\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8
-
----
-
-* [\u30a4\u30f3\u30c8\u30ed\u30c0\u30af\u30b7\u30e7\u30f3](overview.html)
-* [\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb](installation.html)
-* [\u30af\u30a4\u30c3\u30af\u30b9\u30bf\u30fc\u30c8](quick-start.html)
-* [\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0 \u30ac\u30a4\u30c9](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\u5206\u6563 \u30c8\u30ec\u30fc\u30cb\u30f3\u30b0](distributed-training.html)
-* [\u30c7\u30fc\u30bf\u306e\u6e96\u5099](data.html)
-* [Checkpoint \u3068 Resume](checkpoint.html)
-* [\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u30c6\u30b9\u30c8 \u3068 \u7279\u5fb4\u62bd\u51fa](test.html)
-* [\u30b5\u30f3\u30d7\u30eb](examples.html)
-    * Feed-forward \u30e2\u30c7\u30eb
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/kr/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/kr/index.md b/doc/docs/kr/index.md
deleted file mode 100644
index 990d5d9..0000000
--- a/doc/docs/kr/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \ucd5c\uc2e0 \ubb38\uc11c
-
----
-
-* [\uac1c\uc694](overview.html)
-* [\uc778\uc2a4\ud1a8](installation.html)
-* [\ud035 \uc2a4\ud0c0\ud2b8](quick-start.html)
-* [\ud504\ub85c\uadf8\ub798\ubc0d \uac00\uc774\ub4dc](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\ubd84\uc0b0 \ud2b8\ub808\uc774\ub2dd](distributed-training.html)
-* [\ub370\uc774\ud130 \uc900\ube44](data.html)
-* [Checkpoint \uc640 Resume](checkpoint.html)
-* [\uc131\ub2a5\ud14c\uc2a4\ud2b8 \ubc0f \ud2b9\uc9d5\ucd94\ucd9c](test.html)
-* [\uc0d8\ud50c](examples.html)
-    * Feed-forward \ubaa8\ub378
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
new file mode 100644
index 0000000..87d26ea
--- /dev/null
+++ b/doc/docs/tensor.rst
@@ -0,0 +1,54 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+There are two set of tensor functions,
+1. Tensor member functions, which would change the internal state of the Tensor instance.
+2. tensor module functions, which accepts Tensor instances as arguments and return
+Tensor instances.
+
+
+Create Tensor instances
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: singa.tensor.Tensor
+
+
+Tensor instances can be constructed from Numpy array,
+
+.. automodule:: singa.tensor
+   :members: from_numpy
+
+
+Set Tensor values
+~~~~~~~~~~~~~~~~~
+
+
+
+
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
index c44a2cf..4b49d5f 100644
--- a/doc/docs/zh/index.md
+++ b/doc/docs/zh/index.md
@@ -1,7 +1,9 @@
 SINGA \u4e2d\u6587\u6587\u6863
----
+==============
 
-* [\u7b80\u4ecb](overview.html)
-* [\u5b89\u88c5](installation_source.html)
-* [\u4f7f\u7528\u6307\u5357](programming-guide.html)
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
index ec727b1..50c65d7 100755
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,9 +7,9 @@ Welcome to Apache Singa
 =======================
 
 Recent News
-===========
+-----------
 
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_ 
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
 
 * The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
 
@@ -34,7 +34,7 @@ Recent News
 * SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
 
 Getting Started
-===============
+---------------
 * The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
@@ -42,7 +42,7 @@ Getting Started
 * Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
 
 Documentation
-=============
+-------------
 
 * Documentations are listed `here <docs.html>`_.
 
@@ -51,8 +51,8 @@ Documentation
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
-=================
-  
+----------------------
+
 * Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
 
 * If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
@@ -62,17 +62,17 @@ How to contribute
 More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
 
 Citing SINGA
-============
+------------
 
 Please cite the following two papers if you use SINGA in your research:
 
 * B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
 
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_). 
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
 
 .. toctree::
    :hidden:
-   
+
    downloads
    docs
 
@@ -85,25 +85,25 @@ Please cite the following two papers if you use SINGA in your research:
    develop/how-contribute
    develop/contribute-code
    develop/contribute-docs
-   
+
 .. toctree::
    :hidden:
    :maxdepth: 2
    :caption: Community
-   
+
    community/source-repository
    community/mail-lists
    community/issue-tracking
    community/team-list
-   
+
 
 
 License
-=======
+----------
 SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
 
 Disclaimers
-===========
+-----------
 
 Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
new file mode 100644
index 0000000..d6faf5d
--- /dev/null
+++ b/examples/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+
+   char-rnn/README
+   imagenet/README
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index aff3587..eff6783 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -68,21 +68,52 @@ def device_query(id, verbose=False):
 
 
 def create_cuda_gpus(num):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        num (int): number of device to create.
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
+
     return singa.Platform.CreateCudaGPUs(num)
 
 
 def create_cuda_gpu():
+    '''Create a single CudaGPU device.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
+
     return singa.Platform.CreateCudaGPUs(1)[0]
 
 
 def create_cuda_gpus_on(device_ids):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        device_ids (list): a list of GPU card IDs.
+
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
     return singa.Platform.CreateCudaGPUsOn(device_ids)
 
 
 def create_cuda_gpu_on(device_id):
+    '''Create a CudaGPU device on the given device ID.
+
+    Args:
+        device_id (int): GPU card ID.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
 
 def get_default_device():
+    '''Get the default host device which is a CppCPU device'''
     return singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index 6e84a4f..3645ff8 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -21,17 +21,21 @@ to call singa::Tensor and its methods
 """
 
 import numpy as np
+from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-from functools import reduce
+import device
 
 
 class Tensor(object):
-    ''' Class and member functions for singa::Tensor
-    '''
-
-    def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
-        ''' shape = (tuple)
+    def __init__(self, shape=None, device=None, dtype=core_pb.kFloat32):
+        '''Create a Py Tensor, which wraps a swig converted Tensor from SINGA
+            Tensor.
+        Args:
+            shape (list), a list of integers for the tensor shape. If shape is not specified, the created tensor is called a dummy tensor.
+            device, a Device instance created from ::py:mod:device. If it is
+                None, then the default host device would be used.
+            dtype, data type. currently, most operations only accept kFloat32.
         '''
         if shape is None:
             # call constructor of singa::Tensor
@@ -111,11 +115,19 @@ class Tensor(object):
         return self.singa_tensor.L1()
 
     def set_value(self, x):
+        '''Set all elements of the tensor to be the give value.
+
+        Args:
+            x (float), a float value to be set to all elements.
+        '''
         # assert type(x) == float, 'set value only accepts float input'
         # if isinstance(x, float):
         self.singa_tensor.floatSetValue(x)
 
     def copy_data(self, t):
+        '''Copy data from other Tensor instance.
+        '''
+        assert type(t) == Tensor, 't must be a singa Tensor instance'
         self.singa_tensor.CopyData(t.singa_tensor)
 
     def clone(self):
@@ -285,16 +297,35 @@ def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
 
 
 def from_numpy(np_array):
+    '''Create a Tensor instance with the shape, dtype and values from the numpy
+        array.
+
+    Args:
+        np_array: the numpy array.
+
+    Returns:
+        A Tensor instance allocated on the default CppCPU device.
+    '''
     ret = Tensor(np_array.shape)
     ret.copy_from_numpy(np_array)
     return ret
 
 
 def to_numpy(t):
-    ''' this method gets the values of tensor data and
-        returns it as numpy array
-        TODO(wangwei) clone t to host
+    '''Convert the tensor into a numpy array.
+
+    Since numpy array is allocated on CPU devices, the input Tensor instance
+    must be on the default CppCPU device.
+
+    Args:
+        t (Tensor), a Tensor on the default CppCPU device.
+
+    Returns:
+        a numpy array
     '''
+    assert t.device == device.get_default_device() or t.device == None, \
+        'Please move the tensor onto the default host device'
+
     if t.dtype == core_pb2.kFloat32:
         np_array = t.singa_tensor.floatGetValue(int(t.size()))
     elif t.dtype == core_pb2.kInt:

[29/51] [abbrv] incubator-singa git commit: Merge commits for debugging the gradient average error and commits for documentation.

Posted by wa...@apache.org.

Merge commits for debugging the gradient average error and commits for documentation.

Conflicts:
	examples/cifar10/alexnet.py
	src/python/singa/layer.py
	src/python/singa/optimizer.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5d20d353
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5d20d353
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5d20d353

Branch: refs/heads/master
Commit: 5d20d353bd09f2bd758f27f5e1851af7ae8d4123
Parents: 5db7eb6 6d4539e
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 20:31:21 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 20:31:21 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/alexnet.cc   | 11 +++-----
 examples/cifar10/alexnet.py   | 53 +++++++-------------------------------
 examples/cifar10/train.py     | 19 +++++++-------
 src/model/feed_forward_net.cc |  6 ++---
 src/model/optimizer/sgd.cc    |  4 +--
 src/python/singa/layer.py     | 30 +++++++++++++++++----
 src/python/singa/net.py       |  8 +++++-
 src/python/singa/optimizer.py | 29 +++++++++++----------
 src/python/singa/tensor.py    |  8 +++---
 9 files changed, 80 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/examples/cifar10/alexnet.cc
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --cc examples/cifar10/alexnet.py
index 17b6a89,dae129f..02437b3
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@@ -35,54 -36,20 +35,21 @@@ def create_net(use_cpu=False)
      W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}
      W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
      W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
-     b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
-     net.add(
-         layer.Conv2D(
-             'conv1',
-             32,
-             5,
-             1,
-             W_specs=W0_specs.copy(),
-             b_specs=b_specs.copy(),
-             pad=2,
-             input_sample_shape=(
-                 3,
-                 32,
-                 32,
-             )))
++
+     b_specs = {'init': 'constant', 'value': 0, 'lr_mult': 2, 'decay_mult': 0}
+     net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
      net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
      net.add(layer.Activation('relu1'))
-     net.add(layer.LRN(name='lrn1'))
-     net.add(
-         layer.Conv2D(
-             'conv2',
-             32,
-             5,
-             1,
-             W_specs=W1_specs.copy(),
-             b_specs=b_specs.copy(),
-          pad=2))
+     net.add(layer.LRN(name='lrn1', size=3, alpha=5e-5))
+     net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
      net.add(layer.Activation('relu2'))
-     net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
-     net.add(layer.LRN('lrn2'))
-     net.add(
-         layer.Conv2D(
-             'conv3',
-             64,
-             5,
-             1,
-             W_specs=W1_specs.copy(),
-             b_specs=b_specs.copy(),
-          pad=2))
+     net.add(layer.AvgPooling2D('pool2', 3, 2,  pad=1))
+     net.add(layer.LRN('lrn2', size=3, alpha=5e-5))
+     net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
      net.add(layer.Activation('relu3'))
-     net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
+     net.add(layer.AvgPooling2D('pool3', 3, 2, pad=1))
      net.add(layer.Flatten('flat'))
-     net.add(
-         layer.Dense(
-             'dense',
-             10,
-             W_specs=W2_specs.copy(),
-          b_specs=b_specs.copy()))
 -    net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
++    net.add(layer.Dense( 'dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
      for (p, specs) in zip(net.param_values(), net.param_specs()):
          filler = specs.filler
          if filler.type == 'gaussian':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/src/python/singa/layer.py
----------------------------------------------------------------------
diff --cc src/python/singa/layer.py
index b0fdb5e,1e9caeb..a9f3826
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@@ -473,28 -388,29 +473,47 @@@ class LRN(Layer)
  
  
  class Dense(Layer):
 +    """Apply linear/affine transformation, also called inner-product or
 +    fully connected layer.
  
 +    Args:
 +        num_output (int): output feature length.
 +        use_bias (bool): add a bias vector or not to the transformed feature
 +        W_specs (dict): specs for the weight matrix
 +            'name' for parameter name
 +            'lr_mult' for learning rate multiplier
 +            'decay_mult' for weight decay multiplier
 +            'init' for init method, which could be 'gaussian', 'uniform',
 +            'xavier' and ''
 +            'std', 'mean', 'high', 'low' for corresponding init methods
 +            'clamp' for gradient constraint, value is scalar
 +            'regularizer' for regularization, currently support 'l2'
 +        b_specs (dict): specs for the bias vector, same fields as W_specs.
 +        W_transpose (bool): if true, output=x*W.T+b;
 +        input_sample_shape (tuple): input feature length
 +    """
      def __init__(self, name, num_output, use_bias=True,
                   W_specs=None, b_specs=None,
-                  W_transpose=True, input_sample_shape=None):
+                  W_transpose=False, input_sample_shape=None):
+         """Apply linear/affine transformation, also called inner-product or
+         fully connected layer.
+ 
+         Args:
+             num_output (int): output feature length.
+             use_bias (bool): add a bias vector or not to the transformed feature
+             W_specs (dict): specs for the weight matrix
+                 'name' for parameter name
+                 'lr_mult' for learning rate multiplier
+                 'decay_mult' for weight decay multiplier
+                 'init' for init method, which could be 'gaussian', 'uniform',
+                 'xavier' and ''
+                 'std', 'mean', 'high', 'low' for corresponding init methods
+                 'clamp' for gradient constraint, value is scalar
+                 'regularizer' for regularization, currently support 'l2'
+             b_specs (dict): specs for the bias vector, same fields as W_specs.
+             W_transpose (bool): if true, output=x*W.T+b;
+             input_sample_shape (tuple): input feature length
+         """
          super(Dense, self).__init__(name)
          conf = self.conf.dense_conf
          conf.num_output = num_output
@@@ -508,15 -424,12 +527,15 @@@
              W_specs['name'] = name + '_weight'
          if 'name' not in b_specs:
              b_specs['name'] = name + '_bias'
-         self.conf.param.extend([_construct_param_specs_from_dict(W_specs)])
-         self.param_specs.append(_construct_param_specs_from_dict(W_specs))
-         self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
-         self.param_specs.append(_construct_param_specs_from_dict(b_specs))
+         wspecs = _construct_param_specs_from_dict(W_specs)
+         bspecs = _construct_param_specs_from_dict(b_specs)
+         self.conf.param.extend([wspecs, bspecs])
+         self.param_specs.extend([wspecs, bspecs])
          # dense layer is transparent to engine.
 -        self.layer = _create_layer('singa', 'Dense')
 +        if engine == 'cudnn':
 +            self.layer = _create_layer('singacuda', 'Dense')
 +        else:
 +            self.layer = _create_layer(engine, 'Dense')
          if input_sample_shape is not None:
              self.setup(input_sample_shape)
  

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --cc src/python/singa/optimizer.py
index 14cf3c0,32f03d4..74e6ade
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@@ -107,23 -100,24 +107,25 @@@ class Optimizer(object)
  
          Args:
              name (str): parameter name
 -            specs (ParamSpec): protobuf obj
 -        """
 -        assert type(specs) == model_pb2.ParamSpec, \
 +            specs (ParamSpec): protobuf obj, including regularizer and
 +                constraint, multipliers for learning rate and weight decay.
- 
 +        '''
 +        assert isinstance(specs, model_pb2.ParamSpec), \
              'specs should be model_pb2.ParamSpec instance'
          if specs.HasField('regularizer'):
              self.regularizers[name] = CppRegularizer(specs.regularizer)
+         elif specs.decay_mult != 1:
+             self.regularizers[name] = L2Regularizer(
+                 specs.decay_mult * self.regularizer.coefficient)
+ 
          if specs.HasField('constraint'):
              self.constraints[name] = CppConstraint(specs.constraint)
+ 
          if specs.lr_mult != 1:
              self.learning_rate_multiplier[name] = specs.lr_mult
-         if specs.decay_mult != 1:
-             self.decay_multiplier[name] = specs.decay_mult
  
 -    def apply_regularizer_constraint(self, value, grad, name=None, step=None):
 -        """Apply regularization and constraint if available.
 +    def apply_regularizer_constraint(self, epoch, value, grad, name=None):
 +        '''Apply regularization and constraint if available.
  
          If there are both global regularizer (constraint) and param specific
          regularizer (constraint), it would use the param specific one.
@@@ -189,32 -184,24 +191,27 @@@
  
  
  class SGD(Optimizer):
 +    '''The vallina Stochasitc Gradient Descent algorithm with momentum.
  
 -    def __init__(self, lr=None, momentum=None, decay=None):
 -        """The vallina Stochasitc Gradient Descent algorithm.
 +    See the base Optimizer for all arguments.
 +    '''
  
 -        See the base Optimizer for all arguments.
 -        """
 -        super(SGD, self).__init__(lr, momentum, decay)
 +    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
 +                 regularizer=None, constraint=None):
-         super(
-             SGD,
-             self).__init__(
-             lr,
-             momentum,
-             weight_decay,
-             lr_gen,
-             regularizer,
-          constraint)
++        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen,
++                                  regularizer, constraint)
          conf = model_pb2.OptimizerConf()
 -        if momentum is not None:
 -            conf.momentum = momentum
 +        if self.momentum is not None:
 +            conf.momentum = self.momentum
 +        conf.type = 'sgd'
          self.opt = singa.CreateOptimizer('SGD')
          self.opt.Setup(conf.SerializeToString())
  
 -    def apply_with_lr(self, step, lr, grad, value, name):
 -        self.apply_regularizer_constraint(value, grad, name, step)
 +    def apply_with_lr(self, epoch, lr, grad, value, name):
 +        self.apply_regularizer_constraint(epoch, value, grad, name)
+         if name is not None and name in self.learning_rate_multiplier:
+             lr = lr * self.learning_rate_multiplier[name]
 -        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
 +        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
          return value
  
  
@@@ -260,9 -240,11 +257,11 @@@ class AdaGrad(Optimizer)
          self.opt = singa.CreateOptimizer('AdaGrad')
          self.opt.Setup(conf.SerializeToString())
  
 -    def apply_with_lr(self, step, lr, grad, value, name):
 -        grad = self.apply_regularizer_constraint(step, value, grad, name)
 +    def apply_with_lr(self, epoch, lr, grad, value, name):
 +        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+         if name is not None and name in self.learning_rate_multiplier:
+             lr = lr * self.learning_rate_multiplier[name]
 -        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
 +        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
          return value
  
  
@@@ -286,9 -265,11 +285,11 @@@ class RMSProp(Optimizer)
          self.opt = singa.CreateOptimizer('RMSProp')
          self.opt.Setup(conf.SerializeToString())
  
 -    def apply_with_lr(self, step, lr, grad, value, name):
 -        grad = self.apply_regularizer_constraint(step, value, grad, name)
 +    def apply_with_lr(self, epoch, lr, grad, value, name):
 +        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+         if name is not None and name in self.learning_rate_multiplier:
+             lr = lr * self.learning_rate_multiplier[name]
 -        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
 +        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
          return value
  
  

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/src/python/singa/tensor.py
----------------------------------------------------------------------

[10/51] [abbrv] incubator-singa git commit: SINGA-229 Complete install targets

Posted by wa...@apache.org.

SINGA-229 Complete install targets

Modify lib names.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/230230ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/230230ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/230230ce

Branch: refs/heads/master
Commit: 230230cea2d45cbb88a385ac6e84fe34959117e9
Parents: a0af465
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Sat Aug 13 15:57:04 2016 +0800
Committer: xiezl <xi...@comp.nus.edu.sg>
Committed: Sat Aug 13 15:57:04 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt      |  3 ++-
 src/CMakeLists.txt  | 10 +++++-----
 test/CMakeLists.txt |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/230230ce/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32d3b8e..994bd51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -53,5 +53,6 @@ ADD_SUBDIRECTORY(examples)
 INSTALL(DIRECTORY include/singa DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
 INSTALL(FILES ${CMAKE_BINARY_DIR}/include/singa/singa_config.h DESTINATION
     ${CMAKE_INSTALL_PREFIX}/include/singa)
-INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/lib DESTINATION ${CMAKE_INSTALL_PREFIX})
+INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/lib DESTINATION ${CMAKE_INSTALL_PREFIX}
+    PATTERN "*libgtest.a" EXCLUDE )
 #INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/bin DESTINATION ${CMAKE_INSTALL_PREFIX})

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/230230ce/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 65a81fc..f6fa698 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,6 +1,6 @@
-# generate protobuf sources
+# generate protobuf sources 
 
-FILE(GLOB proto_files proto/*.proto)
+FILE(GLOB proto_files proto/*.proto) 
 protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_files})
 IF (USE_PYTHON)
     protobuf_generate_python(proto_pys ${proto_files})
@@ -10,16 +10,16 @@ INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include")
 #message(STATUS "srcs: ${proto_srcs}")
 #message(STATUS "hdrs: ${proto_hdrs}")
 #message(STATUS "pys: ${proto_pys}")
-ADD_LIBRARY(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_pys})
+ADD_LIBRARY(singa_proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_pys})
 FOREACH(fil ${proto_hdrs})
     ADD_CUSTOM_COMMAND(
-        TARGET proto PRE_BUILD
+        TARGET singa_proto PRE_BUILD
         COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/include/singa/proto"
         COMMAND ${CMAKE_COMMAND} -E copy ${fil} "${CMAKE_BINARY_DIR}/include/singa/proto"
         #COMMAND ${CMAKE_COMMAND} -E echo "copy done"
         )
 ENDFOREACH()
-LIST(APPEND SINGA_LINKER_LIBS proto)
+LIST(APPEND SINGA_LINKER_LIBS singa_proto)
 
 #FILE(GLOB_RECURSE utils_source ${CMAKE_CURRENT_SOURCE_DIR}/utils/ "*.cc")
 AUX_SOURCE_DIRECTORY(utils utils_source)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/230230ce/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 044d65a..1c2550b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -13,5 +13,5 @@ ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
 ADD_DEPENDENCIES(test_singa singa_core singa_utils)
 MESSAGE(STATUS "link libs" ${singa_linker_libs})
 TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
-    singa_io proto protobuf ${SINGA_LINKER_LIBS})
+    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
 SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")

[16/51] [abbrv] incubator-singa git commit: Merge PR #240 for training RBM againt MNIST

Posted by wa...@apache.org.

Merge PR #240 for training RBM againt MNIST


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/1db27841
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/1db27841
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/1db27841

Branch: refs/heads/master
Commit: 1db278417bd5e569ef69a2aa83af26ee6f609701
Parents: dffae6b 5b332a4
Author: Wei Wang <wa...@gmail.com>
Authored: Sun Aug 14 17:10:37 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 17:10:37 2016 +0800

----------------------------------------------------------------------
 examples/mnist/README.md           |  18 +++++
 examples/mnist/train.py            | 134 ++++++++++++++++++++++++++++++++
 include/singa/core/tensor.h        |  19 +++++
 include/singa/model/loss.h         |   1 -
 src/core/tensor/math_kernel.cu     |  53 ++++++++++++-
 src/core/tensor/math_kernel.h      |  14 +++-
 src/core/tensor/tensor.cc          |  15 +++-
 src/core/tensor/tensor_math.h      |  24 ++++++
 src/core/tensor/tensor_math_cpp.h  |  42 ++++++++++
 src/core/tensor/tensor_math_cuda.h |  40 +++++++++-
 src/python/singa/optimizer.py      |   7 +-
 src/python/singa/tensor.py         |  20 ++++-
 src/python/swig/core_tensor.i      |  10 +++
 13 files changed, 378 insertions(+), 19 deletions(-)
----------------------------------------------------------------------

[49/51] [abbrv] incubator-singa git commit: Preparing for V1.0 RC0.

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/rat_check
----------------------------------------------------------------------
diff --git a/rat_check b/rat_check
deleted file mode 100644
index 77f0a63..0000000
--- a/rat_check
+++ /dev/null
@@ -1,1108 +0,0 @@
-
-*****************************************************
-Summary
--------
-Generated at: 2016-08-17T13:11:07+08:00
-Notes: 5
-Binaries: 0
-Archives: 0
-Standards: 288
-
-Apache Licensed: 269
-Generated Documents: 0
-
-JavaDocs are generated and so license header is optional
-Generated files do not required license headers
-
-18 Unknown Licenses
-
-*******************************
-
-Unapproved licenses:
-
-  ./.gitmodules
-  ./.travis.yml
-  ./rat_check
-  ./cmake/Protobuf.cmake
-  ./cmake/Thirdparty/FindOpenCL.cmake
-  ./include/singa/utils/cuda_utils.h
-  ./include/singa/utils/tinydir.h
-  ./lib/cnmem/.git
-  ./lib/cnmem/CMakeLists.txt
-  ./lib/cnmem/include/cnmem.h
-  ./lib/cnmem/src/cnmem.cpp
-  ./lib/cnmem/tests/cnmem_tests.cpp
-  ./src/core/tensor/distribution.cl
-  ./src/python/swig/numpy.i
-  ./test/gtest/gtest-all.cc
-  ./test/gtest/gtest.h
-  ./test/gtest/gtest_main.cc
-  ./tool/cpplint.py
-
-*******************************
-
-Archives:
-
-*****************************************************
-  Files with Apache License headers will be marked AL
-  Binary files (which do not require AL headers) will be marked B
-  Compressed archives will be marked A
-  Notices, licenses etc will be marked N
- !????? ./.gitmodules
- !????? ./.travis.yml
-  AL    ./CMakeLists.txt
-  N     ./DISCLAIMER
-  N     ./LICENSE
-  N     ./NOTICE
-  N     ./RELEASE_NOTES
-  AL    ./jenkins.sh
- !????? ./rat_check
-  AL    ./bin/singa-cleanup.sh
-  AL    ./bin/singa-console.sh
-  AL    ./bin/singa-env.sh
-  AL    ./bin/singa-run.sh
-  AL    ./bin/singa-stop.sh
-  AL    ./bin/zk-service.sh
-  AL    ./cmake/Cuda.cmake
-  AL    ./cmake/Dependencies.cmake
- !????? ./cmake/Protobuf.cmake
-  AL    ./cmake/Utils.cmake
-  AL    ./cmake/Templates/singa_config.h.in
-  AL    ./cmake/Thirdparty/FindCBLAS.cmake
-  AL    ./cmake/Thirdparty/FindCUDNN.cmake
-  AL    ./cmake/Thirdparty/FindGlog.cmake
-  AL    ./cmake/Thirdparty/FindLMDB.cmake
- !????? ./cmake/Thirdparty/FindOpenCL.cmake
-  AL    ./examples/CMakeLists.txt
-  AL    ./examples/index.rst
-  AL    ./examples/char-rnn/sample.py
-  AL    ./examples/char-rnn/train.py
-  AL    ./examples/cifar10/CMakeLists.txt
-  AL    ./examples/cifar10/alexnet-parallel.cc
-  AL    ./examples/cifar10/alexnet.cc
-  AL    ./examples/cifar10/alexnet.py
-  AL    ./examples/cifar10/cifar10.h
-  AL    ./examples/cifar10/download_data.py
-  AL    ./examples/cifar10/predict.py
-  AL    ./examples/cifar10/run-parallel.sh
-  AL    ./examples/cifar10/run.sh
-  AL    ./examples/cifar10/train.py
-  AL    ./examples/cifar10/vgg-parallel.cc
-  AL    ./examples/cifar10/vgg.py
-  AL    ./examples/imagenet/CMakeLists.txt
-  AL    ./examples/imagenet/alexnet.cc
-  AL    ./examples/imagenet/create_data.sh
-  AL    ./examples/imagenet/ilsvrc12.cc
-  AL    ./examples/imagenet/ilsvrc12.h
-  AL    ./examples/imagenet/run.sh
-  AL    ./examples/mnist/train.py
-  AL    ./include/singa/core/common.h
-  AL    ./include/singa/core/device.h
-  AL    ./include/singa/core/memory.h
-  AL    ./include/singa/core/scheduler.h
-  AL    ./include/singa/core/tensor.h
-  AL    ./include/singa/io/decoder.h
-  AL    ./include/singa/io/encoder.h
-  AL    ./include/singa/io/integer.h
-  AL    ./include/singa/io/network.h
-  AL    ./include/singa/io/reader.h
-  AL    ./include/singa/io/snapshot.h
-  AL    ./include/singa/io/transformer.h
-  AL    ./include/singa/io/writer.h
-  AL    ./include/singa/model/feed_forward_net.h
-  AL    ./include/singa/model/initializer.h
-  AL    ./include/singa/model/layer.h
-  AL    ./include/singa/model/loss.h
-  AL    ./include/singa/model/metric.h
-  AL    ./include/singa/model/optimizer.h
-  AL    ./include/singa/model/updater.h
-  AL    ./include/singa/utils/channel.h
- !????? ./include/singa/utils/cuda_utils.h
-  AL    ./include/singa/utils/factory.h
-  AL    ./include/singa/utils/integer.h
-  AL    ./include/singa/utils/logging.h
-  AL    ./include/singa/utils/opencl_utils.h
-  AL    ./include/singa/utils/safe_queue.h
-  AL    ./include/singa/utils/singleton.h
-  AL    ./include/singa/utils/string.h
-  AL    ./include/singa/utils/timer.h
- !????? ./include/singa/utils/tinydir.h
- !????? ./lib/cnmem/.git
- !????? ./lib/cnmem/CMakeLists.txt
-  N     ./lib/cnmem/LICENSE
- !????? ./lib/cnmem/include/cnmem.h
- !????? ./lib/cnmem/src/cnmem.cpp
- !????? ./lib/cnmem/tests/cnmem_tests.cpp
-  AL    ./src/CMakeLists.txt
-  AL    ./src/core/device/cpp_cpu.cc
-  AL    ./src/core/device/cuda_gpu.cc
-  AL    ./src/core/device/device.cc
-  AL    ./src/core/device/opencl_device.cc
-  AL    ./src/core/device/platform.cc
-  AL    ./src/core/memory/memory.cc
-  AL    ./src/core/scheduler/scheduler.cc
- !????? ./src/core/tensor/distribution.cl
-  AL    ./src/core/tensor/math_kernel.cu
-  AL    ./src/core/tensor/math_kernel.h
-  AL    ./src/core/tensor/sparse_tensor.cc
-  AL    ./src/core/tensor/tensor.cc
-  AL    ./src/core/tensor/tensor_math.h
-  AL    ./src/core/tensor/tensor_math_cpp.h
-  AL    ./src/core/tensor/tensor_math_cuda.h
-  AL    ./src/core/tensor/tensor_math_opencl.cl
-  AL    ./src/core/tensor/tensor_math_opencl.h
-  AL    ./src/io/binfile_reader.cc
-  AL    ./src/io/binfile_writer.cc
-  AL    ./src/io/csv_decoder.cc
-  AL    ./src/io/csv_encoder.cc
-  AL    ./src/io/image_transformer.cc
-  AL    ./src/io/jpg_decoder.cc
-  AL    ./src/io/jpg_encoder.cc
-  AL    ./src/io/lmdb_reader.cc
-  AL    ./src/io/lmdb_writer.cc
-  AL    ./src/io/snapshot.cc
-  AL    ./src/io/textfile_reader.cc
-  AL    ./src/io/textfile_writer.cc
-  AL    ./src/io/network/endpoint.cc
-  AL    ./src/io/network/message.cc
-  AL    ./src/model/feed_forward_net.cc
-  AL    ./src/model/rnn.cc
-  AL    ./src/model/layer/activation.cc
-  AL    ./src/model/layer/activation.h
-  AL    ./src/model/layer/batchnorm.cc
-  AL    ./src/model/layer/batchnorm.h
-  AL    ./src/model/layer/convolution.cc
-  AL    ./src/model/layer/convolution.h
-  AL    ./src/model/layer/cudnn_activation.cc
-  AL    ./src/model/layer/cudnn_activation.h
-  AL    ./src/model/layer/cudnn_batchnorm.cc
-  AL    ./src/model/layer/cudnn_batchnorm.h
-  AL    ./src/model/layer/cudnn_convolution.cc
-  AL    ./src/model/layer/cudnn_convolution.h
-  AL    ./src/model/layer/cudnn_dropout.cc
-  AL    ./src/model/layer/cudnn_dropout.h
-  AL    ./src/model/layer/cudnn_lrn.cc
-  AL    ./src/model/layer/cudnn_lrn.h
-  AL    ./src/model/layer/cudnn_pooling.cc
-  AL    ./src/model/layer/cudnn_pooling.h
-  AL    ./src/model/layer/cudnn_rnn.cc
-  AL    ./src/model/layer/cudnn_rnn.h
-  AL    ./src/model/layer/cudnn_softmax.cc
-  AL    ./src/model/layer/cudnn_softmax.h
-  AL    ./src/model/layer/cudnn_utils.h
-  AL    ./src/model/layer/dense.cc
-  AL    ./src/model/layer/dense.h
-  AL    ./src/model/layer/dropout.cc
-  AL    ./src/model/layer/dropout.h
-  AL    ./src/model/layer/flatten.cc
-  AL    ./src/model/layer/flatten.h
-  AL    ./src/model/layer/lrn.cc
-  AL    ./src/model/layer/lrn.h
-  AL    ./src/model/layer/pooling.cc
-  AL    ./src/model/layer/pooling.h
-  AL    ./src/model/layer/prelu.cc
-  AL    ./src/model/layer/prelu.h
-  AL    ./src/model/layer/rnn.cc
-  AL    ./src/model/layer/rnn.h
-  AL    ./src/model/layer/softmax.cc
-  AL    ./src/model/layer/softmax.h
-  AL    ./src/model/loss/mse.cc
-  AL    ./src/model/loss/softmax_cross_entropy.cc
-  AL    ./src/model/metric/accuracy.cc
-  AL    ./src/model/optimizer/adagrad.cc
-  AL    ./src/model/optimizer/local_all_reduce.cc
-  AL    ./src/model/optimizer/nesterov.cc
-  AL    ./src/model/optimizer/optimizer.cc
-  AL    ./src/model/optimizer/rmsprop.cc
-  AL    ./src/model/optimizer/sgd.cc
-  AL    ./src/model/updater/local_updater.cc
-  AL    ./src/model/updater/updater.cc
-  AL    ./src/proto/core.proto
-  AL    ./src/proto/io.proto
-  AL    ./src/proto/model.proto
-  AL    ./src/python/setup.py.in
-  AL    ./src/python/singa/__init__.py
-  AL    ./src/python/singa/command.py
-  AL    ./src/python/singa/device.py
-  AL    ./src/python/singa/initializer.py
-  AL    ./src/python/singa/layer.py
-  AL    ./src/python/singa/loss.py
-  AL    ./src/python/singa/metric.py
-  AL    ./src/python/singa/model.py
-  AL    ./src/python/singa/net.py
-  AL    ./src/python/singa/optimizer.py
-  AL    ./src/python/singa/tensor.py
-  AL    ./src/python/singa/utils.py
-  AL    ./src/python/swig/core_device.i
-  AL    ./src/python/swig/core_tensor.i
-  AL    ./src/python/swig/model_layer.i
-  AL    ./src/python/swig/model_loss.i
-  AL    ./src/python/swig/model_metric.i
-  AL    ./src/python/swig/model_optimizer.i
- !????? ./src/python/swig/numpy.i
-  AL    ./src/python/swig/singa.i
-   ./src/python/swig/singa_wrap.cxx
-  AL    ./src/utils/channel.cc
-  AL    ./src/utils/logging.cc
-  AL    ./src/utils/opencl_utils.cc
-  AL    ./test/CMakeLists.txt
-  AL    ./test/gtest/CMakeLists.txt
- !????? ./test/gtest/gtest-all.cc
- !????? ./test/gtest/gtest.h
- !????? ./test/gtest/gtest_main.cc
-  AL    ./test/python/test_layer.py
-  AL    ./test/python/test_optimizer.py
-  AL    ./test/python/test_tensor.py
-  AL    ./test/singa/test_accuracy.cc
-  AL    ./test/singa/test_activation.cc
-  AL    ./test/singa/test_adagrad.cc
-  AL    ./test/singa/test_batchnorm.cc
-  AL    ./test/singa/test_binfile_rw.cc
-  AL    ./test/singa/test_channel.cc
-  AL    ./test/singa/test_convolution.cc
-  AL    ./test/singa/test_cpp_cpu.cc
-  AL    ./test/singa/test_cross_entropy.cc
-  AL    ./test/singa/test_csv.cc
-  AL    ./test/singa/test_cudnn_activation.cc
-  AL    ./test/singa/test_cudnn_batchnorm.cc
-  AL    ./test/singa/test_cudnn_convolution.cc
-  AL    ./test/singa/test_cudnn_dropout.cc
-  AL    ./test/singa/test_cudnn_lrn.cc
-  AL    ./test/singa/test_cudnn_pooling.cc
-  AL    ./test/singa/test_cudnn_rnn.cc
-  AL    ./test/singa/test_cudnn_softmax.cc
-  AL    ./test/singa/test_dense.cc
-  AL    ./test/singa/test_dropout.cc
-  AL    ./test/singa/test_ep.cc
-  AL    ./test/singa/test_flatten.cc
-  AL    ./test/singa/test_image_transformer.cc
-  AL    ./test/singa/test_initializer.cc
-  AL    ./test/singa/test_jpg.cc
-  AL    ./test/singa/test_layer.cc
-  AL    ./test/singa/test_lmdb_rw.cc
-  AL    ./test/singa/test_logging.cc
-  AL    ./test/singa/test_lrn.cc
-  AL    ./test/singa/test_memory.cc
-  AL    ./test/singa/test_mse.cc
-  AL    ./test/singa/test_nesterov.cc
-  AL    ./test/singa/test_opencl.cc
-  AL    ./test/singa/test_platform.cc
-  AL    ./test/singa/test_pooling.cc
-  AL    ./test/singa/test_prelu.cc
-  AL    ./test/singa/test_rmsprop.cc
-  AL    ./test/singa/test_sgd.cc
-  AL    ./test/singa/test_snapshot.cc
-  AL    ./test/singa/test_softmax.cc
-  AL    ./test/singa/test_tensor.cc
-  AL    ./test/singa/test_tensor_math.cc
-  AL    ./test/singa/test_textfile_rw.cc
-  AL    ./test/singa/test_timer.cc
-  AL    ./thirdparty/install.sh
- !????? ./tool/cpplint.py
-  AL    ./tool/graph.py
-  AL    ./tool/node.sh
-  AL    ./tool/docker/mesos/.bashrc
-  AL    ./tool/docker/mesos/Dockerfile
-  AL    ./tool/docker/mesos/core-site.xml
-  AL    ./tool/docker/mesos/hdfs-site.xml
-  AL    ./tool/docker/mesos/install.sh
-  AL    ./tool/docker/mesos/mapred-site.xml
-  AL    ./tool/docker/mesos/yarn-site.xml
-  AL    ./tool/docker/singa/.bashrc
-  AL    ./tool/docker/singa/Dockerfile
-  AL    ./tool/docker/singa/Dockerfile_gpu
-  AL    ./tool/mesos/scheduler.proto
-  AL    ./tool/mesos/singa_scheduler.cc
-  AL    ./tool/python/singa.py
-  AL    ./tool/python/examples/__init__.py
-  AL    ./tool/python/examples/cifar10_cnn.py
-  AL    ./tool/python/examples/cifar10_cnn_cudnn.py
-  AL    ./tool/python/examples/cifar10_cnn_parameter.py
-  AL    ./tool/python/examples/mnist_ae.py
-  AL    ./tool/python/examples/mnist_mlp.py
-  AL    ./tool/python/examples/mnist_mlp_parameter.py
-  AL    ./tool/python/examples/mnist_mlp_test.py
-  AL    ./tool/python/examples/mnist_rbm1.py
-  AL    ./tool/python/examples/mnist_rbm2.py
-  AL    ./tool/python/examples/mnist_rbm3.py
-  AL    ./tool/python/examples/mnist_rbm4.py
-  AL    ./tool/python/examples/train_cifar10.py
-  AL    ./tool/python/examples/train_mnist.py
-  AL    ./tool/python/examples/datasets/__init__.py
-  AL    ./tool/python/examples/datasets/cifar10.py
-  AL    ./tool/python/examples/datasets/mnist.py
-  AL    ./tool/python/singa/__init__.py
-  AL    ./tool/python/singa/driver.i
-  AL    ./tool/python/singa/generatepy.sh
-  AL    ./tool/python/singa/initializations.py
-  AL    ./tool/python/singa/layer.py
-  AL    ./tool/python/singa/model.py
-  AL    ./tool/python/singa/parameter.py
-  AL    ./tool/python/singa/utils/__init__.py
-  AL    ./tool/python/singa/utils/message.py
-  AL    ./tool/python/singa/utils/utility.py
- 
-*****************************************************
- Printing headers for files without AL header...
- 
- 
-=======================================================================
-==./.gitmodules
-=======================================================================
-[submodule "lib/cnmem"]
-	path = lib/cnmem
-	url = https://github.com/NVIDIA/cnmem.git
-
-=======================================================================
-==./.travis.yml
-=======================================================================
-sudo: required
-language: cpp
-compiler: gcc
-dist: trusty
-
-before_install:
- - sudo apt-get -qq update
- - sudo apt-get install -qq -y libopenblas-dev libgoogle-glog-dev libprotobuf-dev protobuf-compiler
- - sudo apt-get install -qq -y opencl-headers ocl-icd-*
- - wget https://github.com/KhronosGroup/OpenCL-CLHPP/releases/download/v2.0.9/cl2.hpp
- - sudo mv cl2.hpp /usr/include/CL/
-#- sudo apt-get install -qq libgtest-dev
-
-before_script:
- - mkdir build && cd build
- - cmake .. -DUSE_CUDA=OFF -DUSE_CUDNN=OFF -DUSE_PYTHON=OFF -DBUILD_OPENCL_TESTS=OFF
-
-script:
- - make
- - ./bin/test_singa --gtest_output=xml:./../gtest.xml
-
-
-=======================================================================
-==./rat_check
-=======================================================================
-
-=======================================================================
-==./cmake/Protobuf.cmake
-=======================================================================
-# This script is taken from
-# https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
-# and modified to our compilation.
-
-function(PROTOBUF_GENERATE_PYTHON OUTPUT)
-    if(NOT ARGN)
-        message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called 
-        without any proto files")
-        return()
-    endif(NOT ARGN)
-
-    set(${OUTPUT})
-    foreach(FIL ${ARGN})
-        get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
-        get_filename_component(FIL_WE ${FIL} NAME_WE)
-        get_filename_component(PATH ${FIL} PATH)
-
-        list(APPEND ${OUTPUT} "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py")
-
-        add_custom_command(
-            OUTPUT "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py"
-            COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
-            ARGS --python_out ${CMAKE_BINARY_DIR}/python/singa/proto
-                 --proto_path ${PATH} ${ABS_FIL}
-            DEPENDS ${ABS_FIL}
-            COMMENT "Running Python protocol buffer compiler on ${FIL}" VERBATIM)
-    endforeach()
-    
-    set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
-    set(${OUTPUT} ${${OUTPUT}} PARENT_SCOPE)
-endfunction()
-
-=======================================================================
-==./cmake/Thirdparty/FindOpenCL.cmake
-=======================================================================
-# This script was taken from https://github.com/elhigu/cmake-findopencl
-# and modified to support finding OpenCL 2.x C++ bindings.
-
-# Find OpenCL
-#
-# To set manually the paths, define these environment variables:
-# OpenCL_INCPATH    - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include)
-# OpenCL_LIBPATH    - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia)
-#
-# Once done this will define
-#  OPENCL_FOUND            - system has OpenCL
-#  OPENCL_INCLUDE_DIRS     - the OpenCL include directory
-#  OPENCL_LIBRARIES        - link these to use OpenCL
-#  OPENCL_HAS_CPP_BINDINGS - system has also cl2.hpp
-
-FIND_PACKAGE(PackageHandleStandardArgs)
-
-SET (OPENCL_VERSION_STRING "0.1.0")
-SET (OPENCL_VERSION_MAJOR 0)
-SET (OPENCL_VERSION_MINOR 1)
-SET (OPENCL_VERSION_PATCH 0)
-
-IF (APPLE)
-
-	# IF OpenCL_LIBPATH is given use it and don't use default path
-	IF (DEFINED ENV{OpenCL_LIBPATH})
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL PATHS ENV OpenCL_LIBPATH NO_DEFAULT_PATH)
-	ELSE ()
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
-	ENDIF ()
-
-	# IF OpenCL_INCPATH is given use it and find for CL/cl.h and OpenCL/cl.h do not try to find default paths
-	IF (DEFINED ENV{OpenCL_INCPATH})
-		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h OpenCL/cl.h PATHS ENV OpenCL_INCPATH NO_DEFAULT_PATH)
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl2.hpp OpenCL/cl2.hpp PATHS ${OPENCL_INCLUDE_DIRS} NO_DEFAULT_PATH)
-	ELSE ()
-		FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl2.hpp DOC "Include for OpenCL CPP bindings on OSX")
-	ENDIF ()
-
-ELSE (APPLE)
-
-	IF (WIN32)
-
-		# Find OpenCL includes and libraries from environment variables provided by vendor
-		SET(OPENCL_INCLUDE_SEARCH_PATHS)
-		SET(OPENCL_LIBRARY_SEARCH_PATHS)
-		SET(OPENCL_LIBRARY_64_SEARCH_PATHS)
-
-		# Nvidia
-
-=======================================================================
-==./include/singa/utils/cuda_utils.h
-=======================================================================
-// from caffe include/caffe/util/device_alternative.hpp
-#ifndef SINGA_UTILS_CUDA_UTILS_H_
-#define SINGA_UTILS_CUDA_UTILS_H_
-
-#include "singa/singa_config.h"
-#ifdef USE_CUDA
-#include <cublas_v2.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <curand.h>
-
-inline const char* cublasGetErrorString(cublasStatus_t error) {
-  switch (error) {
-  case CUBLAS_STATUS_SUCCESS:
-    return "CUBLAS_STATUS_SUCCESS";
-  case CUBLAS_STATUS_NOT_INITIALIZED:
-    return "CUBLAS_STATUS_NOT_INITIALIZED";
-  case CUBLAS_STATUS_ALLOC_FAILED:
-    return "CUBLAS_STATUS_ALLOC_FAILED";
-  case CUBLAS_STATUS_INVALID_VALUE:
-    return "CUBLAS_STATUS_INVALID_VALUE";
-  case CUBLAS_STATUS_ARCH_MISMATCH:
-    return "CUBLAS_STATUS_ARCH_MISMATCH";
-  case CUBLAS_STATUS_MAPPING_ERROR:
-    return "CUBLAS_STATUS_MAPPING_ERROR";
-  case CUBLAS_STATUS_EXECUTION_FAILED:
-    return "CUBLAS_STATUS_EXECUTION_FAILED";
-  case CUBLAS_STATUS_INTERNAL_ERROR:
-    return "CUBLAS_STATUS_INTERNAL_ERROR";
-#if CUDA_VERSION >= 6000
-  case CUBLAS_STATUS_NOT_SUPPORTED:
-    return "CUBLAS_STATUS_NOT_SUPPORTED";
-#endif
-#if CUDA_VERSION >= 6050
-  case CUBLAS_STATUS_LICENSE_ERROR:
-    return "CUBLAS_STATUS_LICENSE_ERROR";
-#endif
-  }
-  return "Unknown cublas status";
-}
-
-inline const char* curandGetErrorString(curandStatus_t error) {
-  switch (error) {
-  case CURAND_STATUS_SUCCESS:
-    return "CURAND_STATUS_SUCCESS";
-  case CURAND_STATUS_VERSION_MISMATCH:
-    return "CURAND_STATUS_VERSION_MISMATCH";
-  case CURAND_STATUS_NOT_INITIALIZED:
-    return "CURAND_STATUS_NOT_INITIALIZED";
-  case CURAND_STATUS_ALLOCATION_FAILED:
-
-=======================================================================
-==./include/singa/utils/tinydir.h
-=======================================================================
-/*
-Copyright (c) 2013-2014, Cong Xu, Baudouin Feildel
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef TINYDIR_H
-#define TINYDIR_H
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#ifdef _WIN32
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#ifdef _MSC_VER
-#pragma warning (disable : 4996)
-#endif
-#else
-#include <dirent.h>
-#include <libgen.h>
-#include <sys/stat.h>
-#endif
-
-
-/* types */
-
-#define _TINYDIR_PATH_MAX 4096
-#ifdef _WIN32
-/* extra chars for the "\\*" mask */
-#define _TINYDIR_PATH_EXTRA 2
-#else
-
-=======================================================================
-==./lib/cnmem/.git
-=======================================================================
-gitdir: ../../.git/modules/lib/cnmem
-
-=======================================================================
-==./lib/cnmem/CMakeLists.txt
-=======================================================================
-# CMakeLists to build the cnmem library.
-cmake_minimum_required(VERSION 2.8.8)
-project(cnmem)
-
-# We need CUDA to build that library.
-find_package(CUDA QUIET REQUIRED)
-include_directories(${CUDA_INCLUDE_DIRS})
-
-# Rules to build the cnmem library.
-include_directories(include)
-add_definitions(-DCNMEM_DLLEXPORT)
-add_library(cnmem SHARED src/cnmem.cpp)
-set_target_properties(cnmem PROPERTIES VERSION 1.0.0 SOVERSION 1)
-target_link_libraries(cnmem LINK_PUBLIC ${CUDA_LIBRARIES})
-install(TARGETS cnmem RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib)
-install(FILES include/cnmem.h DESTINATION include)
-
-# Add the tests.
-if(WITH_TESTS)
-
-  # Get Google tests.
-  find_package(GTest QUIET REQUIRED)
-  include_directories(${GTEST_INCLUDE_DIRS})
-  
-  # Build the executable.
-  add_executable(cnmem_tests tests/cnmem_tests.cpp)
-  if(MSVC)
-    if(MSVC_VERSION GREATER 1700) # Visual Studio 11 or more.
-      add_definitions(-DUSE_CPP_11)
-    endif(MSVC_VERSION GREATER 1700)
-  endif(MSVC)
-  if(CMAKE_COMPILER_IS_GNUCC)
-    add_definitions(-std=c++11 -DUSE_CPP_11)
-  endif(CMAKE_COMPILER_IS_GNUCC)
-  target_link_libraries(cnmem_tests LINK_PUBLIC cnmem ${CUDA_LIBRARIES} ${GTEST_LIBRARIES} -lpthread)
-  install(TARGETS cnmem_tests RUNTIME DESTINATION bin)
-  
-  # On Windows, we copy the Google test DLL to the bin folder.
-  if(MSVC)
-    get_filename_component(gtest_dll_path ${GTEST_LIBRARIES} DIRECTORY)
-    install(FILES ${gtest_dll_path}/gtest.dll DESTINATION bin)
-  endif(MSVC)
-
-endif(WITH_TESTS)
-
-
-=======================================================================
-==./lib/cnmem/include/cnmem.h
-=======================================================================
-/* ********************************************************************** 
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  * Neither the name of NVIDIA CORPORATION nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * ********************************************************************** */
-#pragma once
-
-#ifdef __cplusplus
-#include "cstdio"
-#else
-#include "stdio.h"
-#endif
-#include "cuda_runtime_api.h"
-
-#if defined(_MSC_VER) || defined(WIN32)
-#ifdef CNMEM_DLLEXPORT
-#define CNMEM_API __declspec(dllexport)
-#else
-#define CNMEM_API __declspec(dllimport)
-#endif
-#else
-#ifdef CNMEM_DLLEXPORT
-#define CNMEM_API __attribute__((visibility ("default")))
-#else
-#define CNMEM_API
-#endif
-#endif
-
-
-=======================================================================
-==./lib/cnmem/src/cnmem.cpp
-=======================================================================
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "cnmem.h"
-#include <cstddef>
-#include <vector>
-#include <cuda_runtime_api.h>
-
-#if !defined(WIN32) && defined(_MSC_VER)
-#define WIN32
-#endif
-
-#ifdef WIN32
-#include <Windows.h>
-#else
-#include <pthread.h>
-#endif
-
-#if defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4 // ARMv7 is the only 32-bit target that we support.
-#define CNMEM_BUILD_WITH_32_BIT_POINTERS
-#endif
-
-#define CNMEM_GRANULARITY 512
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-=======================================================================
-==./lib/cnmem/tests/cnmem_tests.cpp
-=======================================================================
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <gtest/gtest.h>
-#include <cnmem.h>
-#include <fstream>
-#ifdef USE_CPP_11
-#include <thread>
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-static std::size_t getFreeMemory() {
-    cudaFree(0);
-    std::size_t freeMem, totalMem;
-    cudaMemGetInfo(&freeMem, &totalMem);
-    return freeMem;
-}
-
-class CnmemTest : public ::testing::Test {
-    /// We determine the amount of free memory.
-    std::size_t mFreeMem;
-    
-protected:
-    /// Do we test memory leaks.
-
-=======================================================================
-==./src/core/tensor/distribution.cl
-=======================================================================
-// This code is adapted from https://github.com/amd/OpenCL-caffe/blob/stable/src/caffe/ocl/random.cl
-
-//Note: random generator has two parts
-//first part: the open sourced threefy random generator kernel from DE Shaw Research
-//second part. we wrap the kernel up to generate uniform, bernoulli and gaussion distribution generators.
-
-//begin: the open sourced random generator from DE Shaw Research
-//https://www.deshawresearch.com/resources_random123.html
-typedef uint uint32_t;
-
-struct r123array4x32 {
-  uint32_t v[4];
-};
-
-enum r123_enum_threefry32x4 {
-  R_32x4_0_0 = 10,
-  R_32x4_0_1 = 26,
-  R_32x4_1_0 = 11,
-  R_32x4_1_1 = 21,
-  R_32x4_2_0 = 13,
-  R_32x4_2_1 = 27,
-  R_32x4_3_0 = 23,
-  R_32x4_3_1 = 5,
-  R_32x4_4_0 = 6,
-  R_32x4_4_1 = 20,
-  R_32x4_5_0 = 17,
-  R_32x4_5_1 = 11,
-  R_32x4_6_0 = 25,
-  R_32x4_6_1 = 10,
-  R_32x4_7_0 = 18,
-  R_32x4_7_1 = 20
-};
-
-inline uint32_t RotL_32(uint32_t x, unsigned int N) {
-  return (x << (N & 31)) | (x >> ((32 - N) & 31));
-}
-
-typedef struct r123array4x32 threefry4x32_ctr_t;
-typedef struct r123array4x32 threefry4x32_key_t;
-typedef struct r123array4x32 threefry4x32_ukey_t;
-
-inline threefry4x32_ctr_t threefry4x32_R(unsigned int Nrounds, threefry4x32_ctr_t in, threefry4x32_key_t k) {
-  threefry4x32_ctr_t X;
-  uint32_t ks[4 + 1];
-  int i;
-  ks[4] = 0x1BD11BDA;
-
-  {
-    ks[0] = k.v[0];
-    X.v[0] = in.v[0];
-
-=======================================================================
-==./src/python/swig/numpy.i
-=======================================================================
-/* -*- C -*-  (not really, but good for syntax highlighting) */
-
-/*
- * Copyright (c) 2005-2015, NumPy Developers.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- *        notice, this list of conditions and the following disclaimer.
- *
- *     * Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials provided
- *        with the distribution.
- *
- *     * Neither the name of the NumPy Developers nor the names of any
- *        contributors may be used to endorse or promote products derived
- *        from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef SWIGPYTHON
-
-%{
-#ifndef SWIG_FILE_WITH_INIT
-#define NO_IMPORT_ARRAY
-#endif
-#include "stdio.h"
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-#include <numpy/arrayobject.h>
-%}
-
-/**********************************************************************/
-
-%fragment("NumPy_Backward_Compatibility", "header")
-{
-
-=======================================================================
-==./test/gtest/gtest-all.cc
-=======================================================================
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: mheule@google.com (Markus Heule)
-//
-// Google C++ Testing Framework (Google Test)
-//
-// Sometimes it's desirable to build Google Test by compiling a single file.
-// This file serves this purpose.
-
-// This line ensures that gtest.h can be compiled on its own, even
-// when it's fused.
-#include "gtest/gtest.h"
-
-// The following lines pull in the real gtest *.cc files.
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-
-=======================================================================
-==./test/gtest/gtest.h
-=======================================================================
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the public API for Google Test.  It should be
-// included by any test program that uses Google Test.
-//
-// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
-// leave some internal implementation details in this header file.
-// They are clearly marked by comments like this:
-//
-//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-//
-// Such code is NOT meant to be used by a user directly, and is subject
-// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
-// program!
-//
-// Acknowledgment: Google Test borrowed the idea of automatic test
-// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
-// easyUnit framework.
-
-
-=======================================================================
-==./test/gtest/gtest_main.cc
-=======================================================================
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdio.h>
-
-#include "gtest/gtest.h"
-
-GTEST_API_ int main(int argc, char **argv) {
-  printf("Running main() from gtest_main.cc\n");
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-
-=======================================================================
-==./tool/cpplint.py
-=======================================================================
-#!/usr/bin/env python
-#
-# Copyright (c) 2009 Google Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#    * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#    * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Does google-lint on c++ files.
-
-The goal of this script is to identify places in the code that *may*
-be in non-compliance with google style.  It does not attempt to fix
-up these problems -- the point is to educate.  It does also not
-attempt to find all problems, or to ensure that everything it does
-find is legitimately a problem.
-
-In particular, we can get very confused by /* and // inside strings!
-We do a small hack, which is to ignore //'s with "'s after them on the
-same line, but it is far from perfect (in either direction).
-"""
-
-import codecs
-import copy
-import getopt
-import math  # for log
-import os
-import re
-import sre_compile

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 174f05e..cc1ee0c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -6,19 +6,19 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# 
+#
 
-# generate protobuf sources 
+# generate protobuf sources
 
-FILE(GLOB proto_files proto/*.proto) 
+FILE(GLOB proto_files proto/*.proto)
 protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_files})
 IF (USE_PYTHON)
     protobuf_generate_python(proto_pys ${proto_files})
@@ -111,9 +111,9 @@ IF(USE_PYTHON)
     file(GLOB_RECURSE python_source_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.py)
 
     create_symlinks(${python_source_files})
-    
+
     SET(python_cxxs "${core_source};${io_source};${model_source};${utils_source}")
-    ADD_LIBRARY(_singa_wrap SHARED "${python_srcs} ${python_cxxs} ${cuda_objs}")
+    ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${python_cxxs} ${cuda_objs})
     SET(WRAPPER_LINKER_LIBS "${PREVIOUS_LINKER_LIBS}")
     TARGET_LINK_LIBRARIES(_singa_wrap ${WRAPPER_LINKER_LIBS})
     TARGET_INCLUDE_DIRECTORIES(_singa_wrap PRIVATE ${PYTHON_INCLUDE_DIRS})

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/README.md
----------------------------------------------------------------------
diff --git a/tool/python/README.md b/tool/python/README.md
deleted file mode 100644
index 8c90cfe..0000000
--- a/tool/python/README.md
+++ /dev/null
@@ -1,375 +0,0 @@
-# Python Binding
-
----
-
-Python binding provides APIs for configuring a training job following
-[keras](http://keras.io/), including the configuration of neural net, training
-algorithm, etc.  It replaces the configuration file (e.g., *job.conf*) in
-protobuf format, which is typically long and error-prone to prepare. In later
-version, we will add python functions to interact with the layer and neural net
-objects, which would enable users to train and debug their models
-interactively.
-
-Here is the layout of python related code,
-
-    SINGAROOT/tool/python
-    |-- pb2 (has job_pb2.py)
-    |-- singa
-        |-- model.py
-        |-- layer.py
-        |-- parameter.py
-        |-- initialization.py
-        |-- utils
-            |-- utility.py
-            |-- message.py
-    |-- examples
-        |-- cifar10_cnn.py, mnist_mlp.py, , mnist_rbm1.py, mnist_ae.py, etc.
-        |-- datasets
-            |-- cifar10.py
-            |-- mnist.py
-
-## Compiling and running instructions
-
-In order to use the Python APIs, users need to add the following arguments when compiling
-SINGA,
-
-    ./configure --enable-python --with-python=PYTHON_DIR
-    make
-
-where PYTHON_DIR has Python.h
-
-
-The training program is launched by
-
-    bin/singa-run.sh -exec <user_main.py>
-
-where user_main.py creates the JobProto object and passes it to Driver::Train to
-start the training.
-
-For example,
-
-    cd SINGAROOT
-    bin/singa-run.sh -exec tool/python/examples/cifar10_cnn.py
-
-
-
-## Examples
-
-
-### MLP Example
-
-This example uses python APIs to configure and train a MLP model over the MNIST
-dataset. The configuration content is the same as that written in *SINGAROOT/examples/mnist/job.conf*.
-
-```
-X_train, X_test, workspace = mnist.load_data()
-
-m = Sequential('mlp', sys.argv)
-
-m.add(Dense(2500, init='uniform', activation='tanh'))
-m.add(Dense(2000, init='uniform', activation='tanh'))
-m.add(Dense(1500, init='uniform', activation='tanh'))
-m.add(Dense(1000, init='uniform', activation='tanh'))
-m.add(Dense(500,  init='uniform', activation='tanh'))
-m.add(Dense(10, init='uniform', activation='softmax'))
-
-sgd = SGD(lr=0.001, lr_type='step')
-topo = Cluster(workspace)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-m.fit(X_train, nb_epoch=1000, with_test=True)
-result = m.evaluate(X_test, batch_size=100, test_steps=10, test_freq=60)
-```
-
-### CNN Example
-
-This example uses python APIs to configure and train a CNN model over the Cifar10
-dataset. The configuration content is the same as that written in *SINGAROOT/examples/cifar10/job.conf*.
-
-
-```
-X_train, X_test, workspace = cifar10.load_data()
-
-m = Sequential('cnn', sys.argv)
-
-m.add(Convolution2D(32, 5, 1, 2, w_std=0.0001, b_lr=2))
-m.add(MaxPooling2D(pool_size=(3,3), stride=2))
-m.add(Activation('relu'))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(32, 5, 1, 2, b_lr=2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(64, 5, 1, 2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-
-m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax'))
-
-sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
-topo = Cluster(workspace)
-m.compile(updater=sgd, cluster=topo)
-m.fit(X_train, nb_epoch=1000, with_test=True)
-result = m.evaluate(X_test, 1000, test_steps=30, test_freq=300)
-```
-
-
-### RBM Example
-
-This example uses python APIs to configure and train a RBM model over the MNIST
-dataset. The configuration content is the same as that written in *SINGAROOT/examples/rbm*.conf*.
-
-```
-rbmid = 3
-X_train, X_test, workspace = mnist.load_data(nb_rbm=rbmid)
-m = Energy('rbm'+str(rbmid), sys.argv)
-
-out_dim = [1000, 500, 250]
-m.add(RBM(out_dim, w_std=0.1, b_wd=0))
-
-sgd = SGD(lr=0.1, decay=0.0002, momentum=0.8)
-topo = Cluster(workspace)
-m.compile(optimizer=sgd, cluster=topo)
-m.fit(X_train, alg='cd', nb_epoch=6000)
-```
-
-### AutoEncoder Example
-This example uses python APIs to configure and train an autoencoder model over
-the MNIST dataset. The configuration content is the same as that written in
-*SINGAROOT/examples/autoencoder.conf*.
-
-
-```
-rbmid = 4
-X_train, X_test, workspace = mnist.load_data(nb_rbm=rbmid+1)
-m = Sequential('autoencoder', sys.argv)
-
-hid_dim = [1000, 500, 250, 30]
-m.add(Autoencoder(hid_dim, out_dim=784, activation='sigmoid', param_share=True))
-
-agd = AdaGrad(lr=0.01)
-topo = Cluster(workspace)
-m.compile(loss='mean_squared_error', optimizer=agd, cluster=topo)
-m.fit(X_train, alg='bp', nb_epoch=12200)
-```
-
-### To run SINGA on GPU
-
-Users need to set a list of gpu ids to `device` field in fit() or evaluate().
-The number of GPUs must be the same to the number of workers configured for
-cluster topology.
-
-
-```
-gpu_id = [0]
-m.fit(X_train, nb_epoch=100, with_test=True, device=gpu_id)
-```
-
-### TIPS
-
-Hidden layers for MLP can be configured as
-
-```
-for n in [2500, 2000, 1500, 1000, 500]:
-  m.add(Dense(n, init='uniform', activation='tanh'))
-m.add(Dense(10, init='uniform', activation='softmax'))
-```
-
-Activation layer can be specified separately
-
-```
-m.add(Dense(2500, init='uniform'))
-m.add(Activation('tanh'))
-```
-
-Users can explicitly specify hyper-parameters of weight and bias
-
-```
-par = Parameter(init='uniform', scale=0.05)
-m.add(Dense(2500, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(2000, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(1500, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(1000, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(500, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(10, w_param=par, b_param=par, activation='softmax'))
-```
-
-
-```
-parw = Parameter(init='gauss', std=0.0001)
-parb = Parameter(init='const', value=0)
-m.add(Convolution(32, 5, 1, 2, w_param=parw, b_param=parb, b_lr=2))
-m.add(MaxPooling2D(pool_size(3,3), stride=2))
-m.add(Activation('relu'))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-parw.update(std=0.01)
-m.add(Convolution(32, 5, 1, 2, w_param=parw, b_param=parb))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size(3,3), stride=2))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution(64, 5, 1, 2, w_param=parw, b_param=parb, b_lr=1))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size(3,3), stride=2))
-
-m.add(Dense(10, w_param=parw, w_wd=250, b_param=parb, b_lr=2, b_wd=0, activation='softmax'))
-```
-
-
-Data can be added in this way,
-
-```
-X_train, X_test = mnist.load_data()  // parameter values are set in load_data()
-m.fit(X_train, ...)                  // Data layer for training is added
-m.evaluate(X_test, ...)              // Data layer for testing is added
-```
-or this way,
-
-```
-X_train, X_test = mnist.load_data()  // parameter values are set in load_data()
-m.add(X_train)                       // explicitly add Data layer
-m.add(X_test)                        // explicitly add Data layer
-```
-
-
-```
-store = Store(path='train.bin', batch_size=64, ...)        // parameter values are set explicitly
-m.add(Data(load='recordinput', phase='train', conf=store)) // Data layer is added
-store = Store(path='test.bin', batch_size=100, ...)        // parameter values are set explicitly
-m.add(Data(load='recordinput', phase='test', conf=store))  // Data layer is added
-```
-
-
-### Cases to run SINGA
-
-(1) Run SINGA for training
-
-```
-m.fit(X_train, nb_epoch=1000)
-```
-
-(2) Run SINGA for training and validation
-
-```
-m.fit(X_train, validate_data=X_valid, nb_epoch=1000)
-```
-
-(3) Run SINGA for test while training
-
-```
-m.fit(X_train, nb_epoch=1000, with_test=True)
-result = m.evaluate(X_test, batch_size=100, test_steps=100)
-```
-
-(4) Run SINGA for test only
-Assume a checkpoint exists after training
-
-```
-result = m.evaluate(X_test, batch_size=100, checkpoint_path=workspace+'/checkpoint/step100-worker0')
-```
-
-
-## Implementation Details
-
-### Layer class (inherited)
-
-* Data
-* Dense
-* Activation
-* Convolution2D
-* MaxPooling2D
-* AvgPooling2D
-* LRN2D
-* Dropout
-* RBM
-* Autoencoder
-
-### Model class
-
-Model class has `jobconf` (JobProto) and `layers` (layer list)
-
-Methods in Model class
-
-* add
-	* add Layer into Model
-	* 2 subclasses: Sequential model and Energy model
-
-* compile
-	* set Updater (i.e., optimizer) and Cluster (i.e., topology) components
-
-* fit
-	* set Training data and parameter values for the training
-		* (optional) set Validatiaon data and parameter values
-	* set Train_one_batch component
-	* specify `with_test` field if a user wants to run SINGA with test data simultaneously.
-	* [TODO] recieve train/validation results, e.g., accuracy, loss, ppl, etc.
-
-* evaluate
-	* set Testing data and parameter values for the testing
-	* specify `checkpoint_path` field if a user want to run SINGA only for testing.
-	* [TODO] recieve test results, e.g., accuracy, loss, ppl, etc.
-
-### Results
-
-fit() and evaluate() return train/test results, a dictionary containing
-
-* [key]: step number
-* [value]: a list of dictionay
-	* 'acc' for accuracy
-	* 'loss' for loss
-	* 'ppl' for ppl
-	* 'se' for squred error
-
-
-### Parameter class
-
-Users need to set parameter and initial values. For example,
-
-* Parameter (fields in Param proto)
-	* lr = (float) // learning rate multiplier, used to scale the learning rate when updating parameters.
-	* wd = (float) // weight decay multiplier, used to scale the weight decay when updating parameters.
-
-* Parameter initialization (fields in ParamGen proto)
-	* init = (string) // one of the types, 'uniform', 'constant', 'gaussian'
-	* high = (float)  // for 'uniform'
-	* low = (float)   // for 'uniform'
-	* value = (float) // for 'constant'
-	* mean = (float)  // for 'gaussian'
-	* std = (float)   // for 'gaussian'
-
-* Weight (`w_param`) is 'gaussian' with mean=0, std=0.01 at default
-
-* Bias (`b_param`) is 'constant' with value=0 at default
-
-* How to update the parameter fields
-	* for updating Weight, put `w_` in front of field name
-	* for updating Bias, put `b_` in front of field name
-
-Several ways to set Parameter values
-
-```
-parw = Parameter(lr=2, wd=10, init='gaussian', std=0.1)
-parb = Parameter(lr=1, wd=0, init='constant', value=0)
-m.add(Convolution2D(10, w_param=parw, b_param=parb, ...)
-```
-
-```
-m.add(Dense(10, w_mean=1, w_std=0.1, w_lr=2, w_wd=10, ...)
-```
-
-```
-parw = Parameter(init='constant', mean=0)
-m.add(Dense(10, w_param=parw, w_lr=1, w_wd=1, b_value=1, ...)
-```
-
-### Other classes
-
-* Store
-* Algorithm
-* Updater
-* SGD
-* AdaGrad
-* Cluster
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/__init__.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/__init__.py b/tool/python/examples/__init__.py
deleted file mode 100644
index a796a7a..0000000
--- a/tool/python/examples/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/cifar10_cnn.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/cifar10_cnn.py b/tool/python/examples/cifar10_cnn.py
deleted file mode 100755
index 8d4e778..0000000
--- a/tool/python/examples/cifar10_cnn.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import cifar10
-
-X_train, X_test, workspace = cifar10.load_data()
-
-m = Sequential('cifar10-cnn', sys.argv)
-
-m.add(Convolution2D(32, 5, 1, 2, w_std=0.0001, b_lr=2))
-m.add(MaxPooling2D(pool_size=(3,3), stride=2))
-m.add(Activation('relu'))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(32, 5, 1, 2, b_lr=2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(64, 5, 1, 2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-
-m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax'))
-
-sgd = SGD(decay=0.004, momentum=0.9, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
-topo = Cluster(workspace)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-m.fit(X_train, nb_epoch=1000, with_test=True)
-result = m.evaluate(X_test, test_steps=100, test_freq=300)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/cifar10_cnn_cudnn.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/cifar10_cnn_cudnn.py b/tool/python/examples/cifar10_cnn_cudnn.py
deleted file mode 100755
index e243834..0000000
--- a/tool/python/examples/cifar10_cnn_cudnn.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import cifar10
-
-X_train, X_test, workspace = cifar10.load_data()
-
-m = Sequential('cifar10-cnn', sys.argv)
-
-m.add(Convolution2D(32, 5, 1, 2, w_std=0.0001, b_lr=2))
-m.add(MaxPooling2D(pool_size=(3,3), stride=2))
-m.add(Activation('relu'))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(32, 5, 1, 2, b_lr=2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(64, 5, 1, 2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-
-m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax'))
-
-sgd = SGD(decay=0.004, momentum=0.9, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
-topo = Cluster(workspace)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-
-gpu_id = [0]
-m.fit(X_train, nb_epoch=7000, with_test=True, device=gpu_id)
-result = m.evaluate(X_test, test_steps=100, test_freq=1000)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/cifar10_cnn_parameter.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/cifar10_cnn_parameter.py b/tool/python/examples/cifar10_cnn_parameter.py
deleted file mode 100755
index c5470b6..0000000
--- a/tool/python/examples/cifar10_cnn_parameter.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import cifar10
-
-X_train, X_test, workspace = cifar10.load_data()
-
-m = Sequential('cifar10-cnn', sys.argv)
-
-parw = Parameter(init='gaussian', std=0.0001)
-parb = Parameter(init='constant')
-m.add(Convolution2D(32, 5, 1, 2, w_param=parw, b_param=parb, b_lr=2))
-m.add(MaxPooling2D(pool_size=(3,3), stride=2))
-m.add(Activation('relu'))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-parw.update(std=0.01)
-m.add(Convolution2D(32, 5, 1, 2, w_param=parw, b_param=parb))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(64, 5, 1, 2, w_param=parw, b_param=parb, b_lr=1))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-
-m.add(Dense(10, w_param=parw, w_wd=250, b_param=parb, b_lr=2, b_wd=0, activation='softmax'))
-
-sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
-topo = Cluster(workspace)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-m.fit(X_train, nb_epoch=100, with_test=True)
-result = m.evaluate(X_test, test_steps=10, test_freq=300)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/datasets/__init__.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/datasets/__init__.py b/tool/python/examples/datasets/__init__.py
deleted file mode 100644
index a796a7a..0000000
--- a/tool/python/examples/datasets/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/datasets/cifar10.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/datasets/cifar10.py b/tool/python/examples/datasets/cifar10.py
deleted file mode 100644
index ef5136f..0000000
--- a/tool/python/examples/datasets/cifar10.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-from singa.model import *
-
-def load_data(
-         workspace = None,
-         backend = 'kvfile',
-         batchsize = 64,
-         random = 5000,
-         shape = (3, 32, 32),
-         std = 127.5,
-         mean = 127.5
-      ):
-
-  # using cifar10 dataset
-  data_dir = 'examples/cifar10'
-  path_train = data_dir + '/train_data.bin'
-  path_test  = data_dir + '/test_data.bin'
-  path_mean  = data_dir + '/image_mean.bin'
-  if workspace == None: workspace = data_dir
-
-  store = Store(path=path_train, mean_file=path_mean, backend=backend,
-              random_skip=random, batchsize=batchsize,
-              shape=shape)
-
-  data_train = Data(load='recordinput', phase='train', conf=store)
-
-  store = Store(path=path_test, mean_file=path_mean, backend=backend,
-              batchsize=batchsize,
-              shape=shape)
-
-  data_test = Data(load='recordinput', phase='test', conf=store)
-
-  return data_train, data_test, workspace
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/datasets/mnist.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/datasets/mnist.py b/tool/python/examples/datasets/mnist.py
deleted file mode 100644
index 0f75393..0000000
--- a/tool/python/examples/datasets/mnist.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-from singa.model import *
-
-def load_data(
-     workspace = None,
-     backend = 'kvfile',
-     nb_rbm = 0,  # the number of layers for RBM and Autoencoder
-     checkpoint_steps = 0,
-     **pvalues
-   ):
-
-  # using mnist dataset
-  data_dir = 'examples/mnist'
-  path_train = data_dir + '/train_data.bin'
-  path_test  = data_dir + '/test_data.bin'
-  if workspace == None: workspace = data_dir
-
-  # checkpoint path to load
-  checkpoint_list = None
-  if checkpoint_steps > 0:
-    workerid = 0
-    checkpoint_list = []
-    for i in range(nb_rbm-1, 0, -1):
-      checkpoint_list.append('examples/rbm/rbm{0}/checkpoint/step{1}-worker{2}'.format(str(i),checkpoint_steps,workerid))
-
-  store = Store(path=path_train, backend=backend, **pvalues)
-  data_train = Data(load='recordinput', phase='train', conf=store, checkpoint=checkpoint_list)
-
-  store = Store(path=path_test, backend=backend, **pvalues)
-  data_test = Data(load='recordinput', phase='test', conf=store)
-
-  return data_train, data_test, workspace

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_ae.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_ae.py b/tool/python/examples/mnist_ae.py
deleted file mode 100755
index 888f288..0000000
--- a/tool/python/examples/mnist_ae.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import mnist
-
-# Sample parameter values for Autoencoder example
-rbmid = 4
-pvalues = {'batchsize' : 100, 'shape' : 784, 'std_value' : 255}
-X_train, X_test, workspace = mnist.load_data(
-            workspace = 'examples/rbm/autoencoder',
-            nb_rbm = rbmid+1,
-            checkpoint_steps = 6000,
-            **pvalues)
-
-m = Sequential('autoencoder', sys.argv)
-
-hid_dim = [1000, 500, 250, 30]
-m.add(Autoencoder(hid_dim, out_dim=784, activation='sigmoid', param_share=True))
-
-agd = AdaGrad(lr=0.01)
-topo = Cluster(workspace)
-m.compile(loss='mean_squared_error', optimizer=agd, cluster=topo)
-m.fit(X_train, alg='bp', nb_epoch=12200, with_test=True)
-result = m.evaluate(X_test, test_steps=100, test_freq=1000)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_mlp.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_mlp.py b/tool/python/examples/mnist_mlp.py
deleted file mode 100755
index 10cd15e..0000000
--- a/tool/python/examples/mnist_mlp.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import mnist
-
-# Sample parameter values for Mnist MLP example
-pvalues = {'batchsize' : 64, 'shape' : 784, 'random_skip' : 5000,
-           'std_value' : 127.5, 'mean_value' : 127.5}
-X_train, X_test, workspace = mnist.load_data(**pvalues)
-
-m = Sequential('mlp', argv=sys.argv)
-
-''' Weight and Bias are initialized by
-    uniform distribution with scale=0.05 at default
-'''
-m.add(Dense(2500, init='uniform', activation='tanh'))
-m.add(Dense(2000, init='uniform', activation='tanh'))
-m.add(Dense(1500, init='uniform', activation='tanh'))
-m.add(Dense(1000, init='uniform', activation='tanh'))
-m.add(Dense(500,  init='uniform', activation='tanh'))
-m.add(Dense(10, init='uniform', activation='softmax'))
-
-sgd = SGD(lr=0.001, lr_type='step')
-topo = Cluster(workspace)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-
-m.fit(X_train, nb_epoch=100, with_test=True)
-result = m.evaluate(X_test, batch_size=100, test_steps=10)
-
-#e.g., display result
-#for k, v in sorted(result.items(), key=lambda x: x[0]):
-#  print k, v

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_mlp_parameter.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_mlp_parameter.py b/tool/python/examples/mnist_mlp_parameter.py
deleted file mode 100755
index 9080451..0000000
--- a/tool/python/examples/mnist_mlp_parameter.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from singa.datasets import mnist
-
-# Sample parameter values for Mnist MLP example
-pvalues = {'batchsize' : 64, 'shape' : 784,
-           'random_skip' : 5000,
-           'std_value' : 127.5, 'mean_value' : 127.5}
-X_train, X_test, workspace = mnist.load_data(**pvalues)
-
-m = Sequential('mlp', argv=sys.argv)
-
-par = Parameter(init='uniform', scale=0.05)
-m.add(Dense(2500, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(2000, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(1500, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(1000, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(500, w_param=par, b_param=par, activation='tanh'))
-m.add(Dense(10, w_param=par, b_param=par, activation='softmax'))
-
-sgd = SGD(lr=0.001, lr_type='step')
-topo = Cluster(workspace)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-
-m.fit(X_train, nb_epoch=100, with_test=True)
-result = m.evaluate(X_test, batch_size=100, test_steps=10)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_mlp_test.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_mlp_test.py b/tool/python/examples/mnist_mlp_test.py
deleted file mode 100755
index ee4e4aa..0000000
--- a/tool/python/examples/mnist_mlp_test.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import mnist
-
-# Sample parameter values for Mnist MLP example
-pvalues = {'batchsize' : 64, 'shape' : 784,
-           'std_value' : 127.5, 'mean_value' : 127.5}
-X_train, X_test, workspace = mnist.load_data(**pvalues)
-
-m = Sequential('mlp', argv=sys.argv)
-
-m.add(Dense(2500, init='uniform', activation='tanh'))
-m.add(Dense(2000, init='uniform', activation='tanh'))
-m.add(Dense(1500, init='uniform', activation='tanh'))
-m.add(Dense(1000, init='uniform', activation='tanh'))
-m.add(Dense(500,  init='uniform', activation='tanh'))
-m.add(Dense(10, init='uniform', activation='softmax'))
-
-sgd = SGD(lr=0.001, lr_type='step')
-topo = Cluster(workspace)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-
-''' For doing test only, normally users sets checkpoint path
-    e.g., assume that checkpoint exists by
-          m.fit(X_train, nb_epoch=100, checkpoint_freq=100)
-'''
-path = workspace+'/checkpoint/step100-worker0'
-result = m.evaluate(X_test, batch_size=100, test_steps=100, checkpoint_path=path)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_rbm1.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_rbm1.py b/tool/python/examples/mnist_rbm1.py
deleted file mode 100755
index 5f22d52..0000000
--- a/tool/python/examples/mnist_rbm1.py
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import mnist
-
-rbmid = 1
-pvalues = {'batchsize' : 100, 'shape' : 784, 'std_value' : 255}
-X_train, X_test, workspace = mnist.load_data(
-            workspace = 'examples/rbm/rbm1',
-            nb_rbm = rbmid,
-            checkpoint_steps = 6000,
-            **pvalues)
-
-m = Energy('rbm'+str(rbmid), sys.argv)
-
-m.add(RBM(1000, w_std=0.1, b_wd=0))
-
-sgd = SGD(lr=0.1, decay=0.0002, momentum=0.8)
-topo = Cluster(workspace)
-m.compile(optimizer=sgd, cluster=topo)
-m.fit(X_train, alg='cd', nb_epoch=6000)
-#result = m.evaluate(X_test, test_steps=100, test_freq=500)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_rbm2.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_rbm2.py b/tool/python/examples/mnist_rbm2.py
deleted file mode 100755
index 1544f14..0000000
--- a/tool/python/examples/mnist_rbm2.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import mnist
-
-rbmid = 2
-pvalues = {'batchsize' : 100, 'shape' : 784, 'std_value' : 255}
-X_train, X_test, workspace = mnist.load_data(
-            workspace = 'examples/rbm/rbm2',
-            nb_rbm = rbmid,
-            checkpoint_steps = 6000,
-            **pvalues)
-
-m = Energy('rbm'+str(rbmid), sys.argv)
-
-out_dim = [1000, 500]
-m.add(RBM(out_dim, w_std=0.1, b_wd=0))
-
-sgd = SGD(lr=0.1, decay=0.0002, momentum=0.8)
-topo = Cluster(workspace)
-m.compile(optimizer=sgd, cluster=topo)
-m.fit(X_train, alg='cd', nb_epoch=6000)
-#result = m.evaluate(X_test, test_steps=100, test_freq=500)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_rbm3.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_rbm3.py b/tool/python/examples/mnist_rbm3.py
deleted file mode 100755
index 3a6348d..0000000
--- a/tool/python/examples/mnist_rbm3.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import mnist
-
-rbmid = 3
-pvalues = {'batchsize' : 100, 'shape' : 784, 'std_value' : 255}
-X_train, X_test, workspace = mnist.load_data(
-            workspace = 'examples/rbm/rbm3',
-            nb_rbm = rbmid,
-            checkpoint_steps = 6000,
-            **pvalues)
-
-m = Energy('rbm'+str(rbmid), sys.argv)
-
-out_dim = [1000, 500, 250]
-m.add(RBM(out_dim, w_std=0.1, b_wd=0))
-
-sgd = SGD(lr=0.1, decay=0.0002, momentum=0.8)
-topo = Cluster(workspace)
-m.compile(optimizer=sgd, cluster=topo)
-m.fit(X_train, alg='cd', nb_epoch=6000)
-#result = m.evaluate(X_test, test_steps=100, test_freq=500)
-

[19/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index ed651e9..e2572d3 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -16,23 +16,63 @@
 # under the License.
 # =============================================================================
 """
-This script includes Tensor class and its methods for python users
-to call singa::Tensor and its methods
+Example usage::
+
+    from singa import tensor
+    from singa import device
+
+    # create a tensor with shape (2,3), default CppCPU device and float32
+    x = tensor.Tensor((2,3))
+    x.set_value(0.4)
+
+    # create a tensor from a numpy array
+    y = tensor.from_numpy((3,3), dtype=np.float32)
+    y.uniform(-1, 1)
+
+    z = mult(x, y)  # gemm -> z of shape (2, 3)
+
+    x += z # element-wise addition
+
+    dev = device.create_cuda_gpu()
+    x.to_device(dev)  # move the data to a gpu device
+
+    r = relu(x)
+
+    r.to_host()  # move the data back to host cpu
+    s = r.to_numpy()  # tensor -> numpy array, r must be on cpu
+
+
+There are two set of tensor functions,
+
+Tensor member functions
+    which would change the internal state of the Tensor instance.
+Tensor module functions
+    which accept Tensor instances as arguments and return Tensor instances.
+
+Every Tesor instance must be initialized before reading data from it.
 """
 
 import numpy as np
+from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-from functools import reduce
+import device
 
 
 class Tensor(object):
-    ''' Class and member functions for singa::Tensor
+    '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
+
+    The three arguments are three attributes of the Tensor.
+
+    Args:
+        shape (list<int>): a list of integers for the tensor shape. If shape is
+            not specified, the created tensor is called a dummy tensor.
+        device: a swig converted Device instance using the device moduel . If it
+            is None, then the default host device would be used.
+        dtype: data type. currently, most operations only accept kFloat32.
     '''
 
     def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
-        ''' shape = (tuple)
-        '''
         if shape is None:
             # call constructor of singa::Tensor
             self.singa_tensor = singa.Tensor()
@@ -48,125 +88,230 @@ class Tensor(object):
             self.device = device
             self.dtype = dtype
 
-    def copy_from_numpy(self, np_array, offset=0):
-        ''' this method stores the values of numpy array into tensor data
-            from the position of offset
-        '''
-        assert np_array.size == self.size(), 'tensor shape should be the same'
-        if not np_array.ndim == 1:
-            np_array = np_array.flatten()
-        dt = np_array.dtype
-        if dt == np.float32:
-            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
-        elif dt == np.int or dt == np.int32:
-            self.singa_tensor.intCopyDataFromHostPtr(np_array)
-        else:
-            print 'Not implemented yet for ', dt
-
-    # deprecated, access the member data_type directly
-    def data_type(self):
-        return self.singa_tensor.data_type()
-
-    # deprecated, access the member shape directly
-    def shape(self, axis=None):
-        if axis is None:
-            return self.singa_tensor.shape()
-        else:
-            return self.singa_tensor.shape(axis)
-
     def ndim(self):
+        '''
+        Returns:
+            the number of dimensions of the tensor.
+        '''
         return self.singa_tensor.nDim()
 
-    def is_transpose(self):  # TODO(wangwei) make transpose a member
+    def is_transpose(self):
+        '''
+        Returns:
+            True if the internal data is transposed; otherwise False.
+        '''
         return self.singa_tensor.transpose()
 
     def size(self):  # TODO(wangwei) compute size
+        '''
+        Returns:
+            the number of elements of the tensor.
+        '''
         return self.singa_tensor.Size()
 
     def memsize(self):
+        '''
+        Returns:
+            the number of Bytes allocated for this tensor.
+        '''
         return self.singa_tensor.MemSize()
 
     def reshape(self, shape):
+        '''Change the tensor shape.
+
+        Args:
+            shape (list<int>): new shape, which should have the same volumn as
+                the original shape.
+        '''
         assert product(self.shape) == product(shape), \
-               'product of shape should be equal'
+            'product of shape should be equal'
         self.shape = shape
-        self.singa_tensor.Reshape(_tuple_to_vector(shape))
+        self.singa_tensor.Reshape(list(shape))
 
     def reset_like(self, t):
+        '''Reset the shape, dtype and device as the given tensor.
+
+        Args:
+            t (Tensor)
+        '''
         self.singa_tensor.ResetLike(t.singa_tensor)
 
+    '''
     def as_type(self, dtype):
+        Change the data type.
+
+        Args:
+            dtype:
         self.singa_tensor.AsType(dtype)
+    '''
 
     def to_device(self, device):
+        '''Move the tensor data onto a given device.
+
+        Args:
+            device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
+        '''
         self.singa_tensor.ToDevice(device)
 
     def to_host(self):
+        '''Move the tensor data onto the default host CppCPU device.
+        '''
         self.singa_tensor.ToHost()
 
     def l2(self):
+        '''
+        Returns:
+            the L2 norm.
+        '''
         return self.singa_tensor.L2()
 
     def l1(self):
+        '''
+        Returns:
+            the L1 norm.
+        '''
         return self.singa_tensor.L1()
 
     def set_value(self, x):
+        '''Set all elements of the tensor to be the give value.
+
+        Args:
+            x (float), a float value to be set to all elements.
+        '''
         # assert type(x) == float, 'set value only accepts float input'
         # if isinstance(x, float):
         self.singa_tensor.floatSetValue(x)
 
+    def copy_from_numpy(self, np_array, offset=0):
+        ''' Copy the data from the numpy array.
+
+        Args:
+            np_array: source numpy array
+            offset (int): destination offset
+        '''
+        assert np_array.size == self.size(), 'tensor shape should be the same'
+        if not np_array.ndim == 1:
+            np_array = np_array.flatten()
+        dt = np_array.dtype
+        if dt == np.float32:
+            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
+        elif dt == np.int or dt == np.int32:
+            self.singa_tensor.intCopyDataFromHostPtr(np_array)
+        else:
+            print 'Not implemented yet for ', dt
+
     def copy_data(self, t):
+        '''Copy data from other Tensor instance.
+
+        Args:
+            t (Tensor): source Tensor.
+        '''
+        assert type(t) == Tensor, 't must be a singa Tensor instance'
         self.singa_tensor.CopyData(t.singa_tensor)
 
     def clone(self):
-        ''' it does deep copy
-            call singa::Tensor::Clone()
+        '''
+        Returns:
+            a new Tensor which does deep copy of this tensor
         '''
         return _call_singa_func(self.singa_tensor.Clone)
 
-    def transpose(self):
-        ''' shallow copy, negate the transpose field
-            call singa::Tensor::T()
+    def T(self):
+        ''' shallow copy, negate the transpose field.
+
+        Returns:
+            a new Tensor which shares the underlying data memory (shallow copy)
+            but is marked as a transposed version of this tensor.
         '''
         return _call_singa_func(self.singa_tensor.T)
 
+    '''
     def copy(self):
-        ''' shallow copy
+        shallow copy
             call copy constructor of singa::Tensor
-        '''
         return _call_singa_func(singa.Tensor, self.singa_tensor)
+    '''
 
     def deepcopy(self):
-        ''' deep copy
-            call singa::Tensor::Clone()
+        '''Same as clone().
+
+        Returns:
+            a new Tensor
         '''
         return self.clone()
 
     def bernoulli(self, p):
+        '''Sample 0/1 for each element according to the given probability.
+
+        Args:
+            p (float): with probability p, each element is sample to 1.
+        '''
         singa.floatBernoulli(float(p), self.singa_tensor)
 
     def gaussian(self, mean, std):
+        '''Generate a value for each element following a Gaussian distribution.
+
+        Args:
+            mean (float): mean of the distribution
+            std (float): standard variance of the distribution
+        '''
         singa.floatGaussian(float(mean), float(std), self.singa_tensor)
 
     def uniform(self, low, high):
+        '''Generate a value for each element following a uniform distribution.
+
+        Args:
+            low (float): the lower bound
+            high (float): the hight bound
+        '''
         singa.floatUniform(float(low), float(high), self.singa_tensor)
 
     def add_column(self, v):
+        '''Add a tensor to each column of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a column to this tensor.
+        '''
         singa.AddColumn(v.singa_tensor, self.singa_tensor)
 
     def add_row(self, v):
+        '''Add a tensor to each row of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a row to this tensor.
+        '''
         singa.AddRow(v.singa_tensor, self.singa_tensor)
 
     def div_column(self, v):
+        '''Divide each column of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.DivColumn(v.singa_tensor, self.singa_tensor)
 
     def div_row(self, v):
+        '''Divide each row of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.DivRow(v.singa_tensor, self.singa_tensor)
 
     def mult_column(self, v):
+        '''Multiply each column of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.MultColumn(v.singa_tensor, self.singa_tensor)
 
     def mult_row(self, v):
+        '''Multiply each row of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.MultRow(v.singa_tensor, self.singa_tensor)
 
     '''
@@ -174,6 +319,11 @@ class Tensor(object):
     '''
 
     def __iadd__(self, x):
+        ''' inplace element-wise addition with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor += x.singa_tensor
         else:
@@ -181,6 +331,12 @@ class Tensor(object):
         return self
 
     def __isub__(self, x):
+        ''' inplace element-wise subtraction with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+
         if isinstance(x, Tensor):
             self.singa_tensor -= x.singa_tensor
         else:
@@ -188,6 +344,11 @@ class Tensor(object):
         return self
 
     def __imul__(self, x):
+        ''' inplace element-wise multiplication with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor *= x.singa_tensor
         else:
@@ -195,6 +356,11 @@ class Tensor(object):
         return self
 
     def __idiv__(self, x):
+        ''' inplace element-wise division by a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor /= x.singa_tensor
         else:
@@ -284,29 +450,72 @@ def product(shape):
 
 
 def sizeof(dtype):
+    '''
+    Returns:
+        the number of bytes of the given SINGA data type defined in core.proto
+    '''
     return singa.SizeOf(dtype)
 
 
 def reshape(t, s):
+    '''Reshape the input tensor with the given shape.
+
+    Args:
+        t (Tensor): the tensor to be changed
+        s (list<int>): the new shape, which should have the same volumn as the
+            old shape.
+
+    Returns:
+        the new Tensor
+    '''
     return _call_singa_func(singa.Reshape, t.singa_tensor, s)
 
 
 def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
+    '''Copy the data between two Tensor instances which could be on different
+    devices.
+
+    Args:
+        dst (Tensor): destination Tensor
+        src (Tensor): source Tensor
+        size (int) : number of elements to copy
+        dst_offset (int): offset in terms of elements to the start of dst
+        src_offset (int): offset in terms of elements to the start of src
+    '''
     singa.CopyDataToFrom(dst.singa_tensor, src.singa_tensor, size,
                          dst_offset, src_offset)
 
 
 def from_numpy(np_array):
+    '''Create a Tensor instance with the shape, dtype and values from the numpy
+    array.
+
+    Args:
+        np_array: the numpy array.
+
+    Returns:
+        A Tensor instance allocated on the default CppCPU device.
+    '''
     ret = Tensor(np_array.shape)
     ret.copy_from_numpy(np_array)
     return ret
 
 
 def to_numpy(t):
-    ''' this method gets the values of tensor data and
-        returns it as numpy array
-        TODO(wangwei) clone t to host
+    '''Convert the tensor into a numpy array.
+
+    Since numpy array is allocated on CPU devices, the input Tensor instance
+    must be on the default CppCPU device.
+
+    Args:
+        t (Tensor), a Tensor on the default CppCPU device.
+
+    Returns:
+        a numpy array
     '''
+    assert t.device == device.get_default_device() or t.device is None, \
+        'Please move the tensor onto the default host device'
+
     if t.dtype == core_pb2.kFloat32:
         np_array = t.singa_tensor.floatGetValue(int(t.size()))
     elif t.dtype == core_pb2.kInt:
@@ -317,34 +526,96 @@ def to_numpy(t):
 
 
 def abs(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = abs(x), x is an element of t
+    '''
     return _call_singa_func(singa.Abs, t.singa_tensor)
 
 
 def exp(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = exp(x), x is an element of t
+    '''
     return _call_singa_func(singa.Exp, t.singa_tensor)
 
 
 def log(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = log(x), x is an element of t
+    '''
     return _call_singa_func(singa.Log, t.singa_tensor)
 
 
 def relu(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x if x >0; otherwise 0; x is an element
+        of t
+    '''
     return _call_singa_func(singa.ReLU, t.singa_tensor)
 
 
 def sigmoid(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = sigmoid(x); x is an element of t
+    '''
     return _call_singa_func(singa.Sigmoid, t.singa_tensor)
 
 
 def square(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x * x, x is an element of t
+    '''
     return _call_singa_func(singa.Square, t.singa_tensor)
 
 
 def tanh(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = tanh(x), x is an element of t
+    '''
     return _call_singa_func(singa.Tanh, t.singa_tensor)
 
 
 def sum(t, axis=None):
+    '''Sum elements of the input tensor long the given axis.
+
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, the summation is done over all elements;
+            if axis is provided, then it is calculated along the given axis,
+            e.g. 0 -- sum each column; 1 -- sum each row.
+
+    Returns:
+        a float value as the sum of all elements, or a new Tensor
+    '''
+
     if axis is None:
         return singa.floatSum(t.singa_tensor)
     else:
@@ -352,6 +623,17 @@ def sum(t, axis=None):
 
 
 def pow(t, x, out=None):
+    '''
+    Args:
+        t (Tensor): input tensor
+        x (float or Tensor): y[i] = t[i]^x if x is a float value; otherwise,
+            y[i]= t[i]^x[i] if x is a tensor.
+        out (None or Tensor): if None, a new Tensor would be constructed to
+            store the result; otherwise, the result is put into out.
+
+    Returns:
+        the result tensor.
+    '''
     if out is None:
         if isinstance(x, Tensor):
             return _call_singa_func(singa.Pow, t.singa_tensor, x.singa_tensor)
@@ -365,7 +647,17 @@ def pow(t, x, out=None):
         return out
 
 
-def average(t, axis=0):
+def average(t, axis=None):
+    '''
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, average all elements; otherwise average
+            along the given dimension. 0 for averaging each column; 1 for
+            averaging each row.
+
+    Returns:
+        a float value if axis is None; otherwise, a new Tensor for the result.
+    '''
     if t.ndim() > 1:
         return _call_singa_func(singa.Average, t.singa_tensor, axis)
     else:
@@ -373,6 +665,15 @@ def average(t, axis=0):
 
 
 def softmax(t, out=None):
+    '''Apply SoftMax for each row of the Tensor.
+
+    Args:
+        t (Tensor): the input 1d or 2d tensor
+        out (Tensor, optional): if not None, it is used to store the result
+
+    Returns:
+        the result Tensor
+    '''
     if out is None:
         return _call_singa_func(singa.SoftMax, t.singa_tensor)
     else:
@@ -381,22 +682,73 @@ def softmax(t, out=None):
 
 
 def lt(t, x):
+    '''Elementi-wise comparison for t < x
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] < x ? 1.0f:0.0f,
+        or t[i] < x[i] ? 1.0f:0.0f
+    '''
     return t < x
 
 
 def le(t, x):
+    '''Elementi-wise comparison for t <= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] <= x ? 1.0f:0.0f,
+        or t[i] <= x[i] ? 1.0f:0.0f
+    '''
     return t <= x
 
 
 def gt(t, x):
+    '''Elementi-wise comparison for t > x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] > x ? 1.0f:0.0f,
+        or t[i] > x[i] ? 1.0f:0.0f
+    '''
     return t > x
 
 
 def ge(t, x):
+    '''Elementi-wise comparison for t >= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] >= x ? 1.0f:0.0f,
+        or t[i] >= x[i] ? 1.0f:0.0f
+    '''
     return t >= x
 
 
 def add(lhs, rhs, ret=None):
+    '''Elementi-wise addition.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__add__()
         return lhs + rhs
@@ -409,6 +761,17 @@ def add(lhs, rhs, ret=None):
 
 
 def sub(lhs, rhs, ret=None):
+    '''Elementi-wise subtraction.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__sub__()
         return lhs - rhs
@@ -421,6 +784,18 @@ def sub(lhs, rhs, ret=None):
 
 
 def eltwise_mult(lhs, rhs, ret=None):
+    '''Elementi-wise multiplication.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+
     if ret is None:
         # call Tensor.__mul__()
         return lhs * rhs
@@ -435,8 +810,21 @@ def eltwise_mult(lhs, rhs, ret=None):
 
 
 def mult(A, B, C=None, alpha=1.0, beta=0.0):
-    '''
+    '''Do matrix-matrix or matrix-vector multiplication.
+
     This function returns C = alpha * A * B + beta * C
+
+    Args:
+        A (Tensor): 2d Tensor
+        B (Tensor): If B is a 1d Tensor, GEMV would be invoked for matrix-vector
+            multiplication; otherwise GEMM would be invoked.
+        C (Tensor, optional): for storing the result; If None, a new Tensor
+            would be created.
+        alpha (float)
+        beta (float)
+
+    Returns:
+        the result Tensor
     '''
     if C is None:
         return _call_singa_func(singa.Mult, A.singa_tensor, B.singa_tensor)
@@ -447,6 +835,17 @@ def mult(A, B, C=None, alpha=1.0, beta=0.0):
 
 
 def div(lhs, rhs, ret=None):
+    '''Elementi-wise division.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__div__()
         return lhs / rhs
@@ -459,51 +858,125 @@ def div(lhs, rhs, ret=None):
 
 
 def axpy(alpha, x, y):
-    if isinstance(alpha, float):
-        singa.floatAxpy(alpha, x.singa_tensor, y.singa_tensor)
+    '''Element-wise operation for y += alpha * x.
+
+    Args:
+        alpha (float)
+        x (Tensor)
+        y (Tensor)
+
+    Returns:
+        y
+    '''
+    singa.floatAxpy(float(alpha), x.singa_tensor, y.singa_tensor)
     return y
 
 
 def bernoulli(p, t):
-    if isinstance(p, float):
-        singa.floatBernoulli(p, t.singa_tensor)
+    '''Generate a binary value for each element of t.
+
+    Args:
+        p (float): each element is 1 with probability p; and 0 with 1 - p
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatBernoulli(float(p), t.singa_tensor)
     return t
 
 
 def gaussian(mean, std, t):
-    if isinstance(mean, float):
-        singa.floatGaussian(mean, std, t.singa_tensor)
+    '''Generate values following a Gaussian distribution.
+
+    Args:
+        mean (float): the mean of the Gaussian distribution.
+        std (float): the standard variance of the Gaussian distribution.
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatGaussian(float(mean), float(std), t.singa_tensor)
     return t
 
 
 def uniform(low, high, t):
-    if isinstance(low, float):
-        singa.floatUniform(low, high, t.singa_tensor)
+    '''Generate values following a Uniform distribution.
+
+    Args:
+        low (float): the lower bound
+        hight (float): the higher bound
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatUniform(float(low), float(high), t.singa_tensor)
     return t
 
 
 def add_column(alpha, v, beta, M):
-    singa.floatAddColumn(alpha, beta, v.singa_tensor, M.singa_tensor)
+    '''Add v to each column of M.
+
+    Denote each column of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
+    singa.floatAddColumn(float(alpha), float(beta), v.singa_tensor,
+                         M.singa_tensor)
     return M
 
 
 def add_row(alpha, v, beta, M):
+    '''Add v to each row of M.
+
+    Denote each row of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
     singa.floatAddRow(alpha, beta, v.singa_tensor, M.singa_tensor)
     return M
 
 
 def sum_columns(M):
+    '''Sum all columns into a single column.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted column.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_col = M.shape(0)
-    ret = Tensor((nb_col, 1))
+    ret = Tensor((M.shape[0], 1))
     singa.SumColumns(M.singa_tensor, ret.singa_tensor)
     return ret
 
 
 def sum_rows(M):
+    '''Sum all rows into a single row.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted row.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_row = M.shape(1)
-    ret = Tensor((1, nb_row))
+    ret = Tensor((1, M.shape[1]))
     singa.SumRows(M.singa_tensor, ret.singa_tensor)
     return ret
 
@@ -512,15 +985,6 @@ def sum_rows(M):
 '''
 
 
-def _tuple_to_vector(tshape):
-    ''' this function converts tuple to std::vector<int>
-    '''
-    vs = singa.Shape(len(tshape))
-    for i in range(len(tshape)):
-        vs[i] = tshape[i]
-    return vs
-
-
 def _call_singa_func(_singa_func, *args):
     ''' this function calls singa global functions that returns Tensor
         and create new python Tensor instance
@@ -528,7 +992,7 @@ def _call_singa_func(_singa_func, *args):
     '''
     new_t = Tensor()
     new_t.singa_tensor = _singa_func(*args)
-    new_t.shape = new_t.singa_tensor.shape()
+    new_t.shape = tuple(new_t.singa_tensor.shape())
     new_t.device = new_t.singa_tensor.device()
     new_t.dtype = new_t.singa_tensor.data_type()
     return new_t

[47/51] [abbrv] incubator-singa git commit: Preparing for V1.0 RC0.

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/utils/utility.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/utils/utility.py b/tool/python/singa/utils/utility.py
deleted file mode 100644
index b88720c..0000000
--- a/tool/python/singa/utils/utility.py
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-'''
-This script includes methods to
-(1) generate name of layer, parameter, etc.
-(2) set field values for proto.
-(3) swap bits
-'''
-
-LAYERID = 0
-PARAMID = 0
-
-def generate_name(label, option=0, withnumber=True):
-    ''' This method returns name of layer or parameter with unique id.
-        option: 1 to increase id number
-        withnumber: True to concatenate number to name
-    '''
-
-    global LAYERID, PARAMID
-    num = LAYERID
-    if label == 'layer':
-        if option == 1: LAYERID += 1
-        num = LAYERID
-    elif label == 'param':
-        if option == 1: PARAMID += 1
-        num = PARAMID
-    else:
-        if option == 1: LAYERID += 1
-        num = LAYERID
-        if option == 2:
-            num = LAYERID+1
-
-    if withnumber == False:
-        return '{0}'.format(label)
-
-    return '{0}{1}'.format(label, num)
-
-def setval(proto, **kwargs):
-    ''' This method sets field values for give proto.
-    '''
-
-    for key, val in kwargs.items():
-        #print 'kv: ', k, ', ', v
-        if hasattr(proto, key):
-            flabel = proto.DESCRIPTOR.fields_by_name[key].label
-            ftype = proto.DESCRIPTOR.fields_by_name[key].type
-
-            fattr = getattr(proto, key)
-            if flabel == 3: # repeated field
-                if ftype == 11: # message type
-                    fattr = fattr.add()
-                    fattr.MergeFrom(val)
-                else:
-                    if type(val) == list or type(val) == tuple:
-                        for i in range(len(val)):
-                            fattr.append(val[i])
-                    else:
-                        fattr.append(val)
-            else:
-                if ftype == 11: # message type
-                    fattr = getattr(proto, key)
-                    fattr.MergeFrom(val)
-                else:
-                    setattr(proto, key, val)
-

[36/51] [abbrv] incubator-singa git commit: Minor updates to pass tests and run examples

Posted by wa...@apache.org.

Minor updates to pass tests and run examples


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/0a764257
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/0a764257
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/0a764257

Branch: refs/heads/master
Commit: 0a7642576cb0df87c6f08ff00227658c0e03f69f
Parents: 72d736a
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 21:40:55 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Tue Aug 16 00:12:27 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                           |  7 +-
 doc/en/conf.py                         |  2 +-
 doc/en/docs.rst                        |  1 -
 doc/en/docs/device.rst                 |  4 +-
 doc/en/docs/index.rst                  |  9 ++-
 doc/en/docs/initializer.rst            | 12 ++++
 doc/en/docs/layer.rst                  | 14 ++++
 doc/en/docs/loss.rst                   |  7 ++
 doc/en/docs/metric.rst                 |  8 +++
 doc/en/docs/optimizer.rst              | 11 ++++
 doc/en/docs/overview.rst               | 99 -----------------------------
 doc/en/docs/tensor.rst                 | 30 +--------
 doc/en/docs/utils.rst                  |  6 ++
 doc/en/downloads.md                    |  1 -
 doc/en/index.rst                       |  9 +--
 doc/en/releases/RELEASE_NOTES_0.1.0.md | 99 +++++++++++++++++++++++++++++
 doc/en/releases/RELEASE_NOTES_0.2.0.md | 84 ++++++++++++++++++++++++
 doc/en/releases/RELEASE_NOTES_0.3.0.md | 37 +++++++++++
 doc/zh/conf.py                         |  2 +-
 examples/char-rnn/train.py             |  7 +-
 examples/cifar10/README.md             | 69 ++++++++++++++++++++
 examples/cifar10/alexnet.cc            |  2 +-
 examples/cifar10/train.py              |  2 +-
 examples/mnist/train.py                |  1 -
 src/python/singa/optimizer.py          |  2 +
 test/CMakeLists.txt                    |  2 +-
 26 files changed, 373 insertions(+), 154 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index b5282b7..f02595b 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -26,14 +26,9 @@ clean:
 
 .PHONY: html
 html:
-<<<<<<< HEAD
-	cp -rf ../examples docs/
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-=======
 	cp -rf ../examples en/docs/
-	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html
+	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html/en
 	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh
->>>>>>> v1doc
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/conf.py
----------------------------------------------------------------------
diff --git a/doc/en/conf.py b/doc/en/conf.py
index 36080d9..46a48f6 100755
--- a/doc/en/conf.py
+++ b/doc/en/conf.py
@@ -19,7 +19,7 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../build/python'))
+sys.path.insert(1, os.path.abspath('../../build/python'))
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs.rst b/doc/en/docs.rst
index 400b12a..c1b143b 100644
--- a/doc/en/docs.rst
+++ b/doc/en/docs.rst
@@ -3,4 +3,3 @@ Documentation
 
 .. toctree::
    docs/index
-   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/device.rst b/doc/en/docs/device.rst
index e79d87a..53faf48 100644
--- a/doc/en/docs/device.rst
+++ b/doc/en/docs/device.rst
@@ -23,9 +23,7 @@ Python API
    :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
 
 
-The following code provides examples of creating devices,
-
-.. code:: python
+The following code provides examples of creating devices::
 
    from singa import device
    cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/index.rst b/doc/en/docs/index.rst
index 93315de..a2ea540 100644
--- a/doc/en/docs/index.rst
+++ b/doc/en/docs/index.rst
@@ -1,5 +1,5 @@
-English
-=======
+Documentation
+=============
 
 .. toctree::
 
@@ -7,4 +7,9 @@ English
    software_stack
    device
    tensor
+   layer
+   initializer
+   loss
+   metric
+   optimizer
    examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/initializer.rst b/doc/en/docs/initializer.rst
new file mode 100644
index 0000000..f334497
--- /dev/null
+++ b/doc/en/docs/initializer.rst
@@ -0,0 +1,12 @@
+Initializer
+===========
+
+Python API
+----------
+
+.. automodule:: singa.initializer
+   :members: uniform, gaussian
+   :member-order: bysource
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/layer.rst b/doc/en/docs/layer.rst
new file mode 100644
index 0000000..62ef3c3
--- /dev/null
+++ b/doc/en/docs/layer.rst
@@ -0,0 +1,14 @@
+Layer
+======
+
+Python API
+-----------
+.. automodule:: singa.layer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/loss.rst b/doc/en/docs/loss.rst
new file mode 100644
index 0000000..27872dd
--- /dev/null
+++ b/doc/en/docs/loss.rst
@@ -0,0 +1,7 @@
+Loss
+=========
+
+
+.. automodule:: singa.loss
+   :members:
+   :show-inheritance:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/metric.rst b/doc/en/docs/metric.rst
new file mode 100644
index 0000000..35fa24e
--- /dev/null
+++ b/doc/en/docs/metric.rst
@@ -0,0 +1,8 @@
+Metric
+=========
+
+
+.. automodule:: singa.metric
+   :members:
+   :show-inheritance:
+   :member-order: bysource

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/optimizer.rst b/doc/en/docs/optimizer.rst
new file mode 100644
index 0000000..486c01e
--- /dev/null
+++ b/doc/en/docs/optimizer.rst
@@ -0,0 +1,11 @@
+Optimizer
+=========
+
+
+.. automodule:: singa.optimizer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/overview.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/overview.rst b/doc/en/docs/overview.rst
deleted file mode 100644
index 18ad62b..0000000
--- a/doc/en/docs/overview.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-Introduction
-==============
-
-
-SINGA is a general distributed deep learning platform for training big deep
-learning models over large datasets. It is designed with an intuitive
-programming model based on the layer abstraction. A variety
-of popular deep learning models are supported, namely feed-forward models including
-convolutional neural networks (CNN), energy models like restricted Boltzmann
-machine (RBM), and recurrent neural networks (RNN). Many built-in layers are
-provided for users. SINGA architecture is
-sufficiently flexible to run synchronous, asynchronous and hybrid training
-frameworks.  SINGA
-also supports different neural net partitioning schemes to parallelize the
-training of large models, namely partitioning on batch dimension, feature
-dimension or hybrid partitioning.
-
-
-Goals
------
-
-As a distributed system, the first goal of SINGA is to have good scalability. In other
-words, SINGA is expected to reduce the total training time to achieve certain
-accuracy with more computing resources (i.e., machines).
-
-
-The second goal is to make SINGA easy to use.
-It is non-trivial for programmers to develop and train models with deep and
-complex model structures.  Distributed training further increases the burden of
-programmers, e.g., data and model partitioning, and network communication.  Hence it is essential to
-provide an easy to use programming model so that users can implement their deep
-learning models/algorithms without much awareness of the underlying distributed
-platform.
-
-Principles
-----------
-
-Scalability is a challenging research problem for distributed deep learning
-training. SINGA provides a general architecture to exploit the scalability of
-different training frameworks. Synchronous training frameworks improve the
-efficiency of one training iteration, and
-asynchronous training frameworks improve the convergence rate. Given a fixed budget
-(e.g., cluster size), users can run a hybrid framework that maximizes the
-scalability by trading off between efficiency and convergence rate.
-
-SINGA comes with a programming model designed based on the layer abstraction, which
-is intuitive for deep learning models.  A variety of
-popular deep learning models can be expressed and trained using this programming model.
-
-System overview
----------------
-
-.. figure:: /image/sgd.png
-
-            Figure 1 - SGD flow
-
-Training a deep learning model is to find the optimal parameters involved in
-the transformation functions that generate good features for specific tasks.
-The goodness of a set of parameters is measured by a loss function, e.g.,
-`Cross-Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_ . Since the
-loss functions are usually non-linear and non-convex, it is difficult to get a
-closed form solution. Typically, people use the stochastic gradient descent
-(SGD) algorithm, which randomly
-initializes the parameters and then iteratively updates them to reduce the loss
-as shown in Figure 1.
-
-.. figure:: /image/overview.png
-
-           Figure 2 - SINGA overview
-
-SGD is used in SINGA to train
-parameters of deep learning models. The training workload is distributed over
-worker and server units as shown in Figure 2. In each
-iteration, every worker calls *TrainOneBatch* function to compute
-parameter gradients. *TrainOneBatch* takes a *NeuralNet* object
-representing the neural net, and visits layers of the *NeuralNet* in
-certain order. The resultant gradients are sent to the local stub that
-aggregates the requests and forwards them to corresponding servers for
-updating. Servers reply to workers with the updated parameters for the next
-iteration.
-
-
-Job submission
---------------
-
-To submit a job in SINGA (i.e., training a deep learning model),
-users pass the job configuration to SINGA driver in the
-`main function <programming-guide.html>`_ . The job configuration
-specifies the four major components in Figure 2,
-
-  * a `NeuralNet <neural-net.html>`_ describing the neural net structure with the detailed layer setting and their connections;
-  * a `TrainOneBatch <train-one-batch.html>`_  algorithm which is tailored for different model categories;
-  * an `Updater <updater.html>`_  defining the protocol for updating parameters at the server side;
-  * a `Cluster Topology <distributed-training.html>`_ specifying the distributed architecture of workers and servers.
-
-This process is like the job submission in Hadoop, where users configure their
-jobs in the main function to set the mapper, reducer, etc.
-In Hadoop, users can configure their jobs with their own (or built-in) mapper and reducer; in SINGA, users
-can configure their jobs with their own (or built-in) layer, updater, etc.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/tensor.rst b/doc/en/docs/tensor.rst
index 87d26ea..ff6142e 100644
--- a/doc/en/docs/tensor.rst
+++ b/doc/en/docs/tensor.rst
@@ -21,34 +21,10 @@ type of Device.
 Python API
 ----------
 
-There are two set of tensor functions,
-1. Tensor member functions, which would change the internal state of the Tensor instance.
-2. tensor module functions, which accepts Tensor instances as arguments and return
-Tensor instances.
-
-
-Create Tensor instances
-~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autoclass:: singa.tensor.Tensor
-
-
-Tensor instances can be constructed from Numpy array,
 
 .. automodule:: singa.tensor
-   :members: from_numpy
-
-
-Set Tensor values
-~~~~~~~~~~~~~~~~~
-
-
-
-
-
-
-
-
-
+   :members:
 
 
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/utils.rst b/doc/en/docs/utils.rst
new file mode 100644
index 0000000..5306719
--- /dev/null
+++ b/doc/en/docs/utils.rst
@@ -0,0 +1,6 @@
+Misc.
+=========
+
+
+.. automodule:: singa.utils
+   :members:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/downloads.md
----------------------------------------------------------------------
diff --git a/doc/en/downloads.md b/doc/en/downloads.md
index 31e7274..fe0c30a 100644
--- a/doc/en/downloads.md
+++ b/doc/en/downloads.md
@@ -1,5 +1,4 @@
 ## Download SINGA
----
 
 * Latest code: please clone the dev branch from [Github](https://github.com/apache/incubator-singa)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/index.rst b/doc/en/index.rst
index 50c65d7..1bbbe9a 100755
--- a/doc/en/index.rst
+++ b/doc/en/index.rst
@@ -2,7 +2,6 @@
    sphinx-quickstart on Sat Jul  9 20:36:57 2016.
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
-
 Welcome to Apache Singa
 =======================
 
@@ -35,19 +34,17 @@ Recent News
 
 Getting Started
 ---------------
-* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
+* The `Software stack <docs/software_stack.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
 
-* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
+* Please follow the `Examples <docs/examples/index.html>`_ guide to run simple applications on SINGA.
 
 Documentation
 -------------
 
 * Documentations are listed `here <docs.html>`_.
 
-* Code API can be found `here <api/index.html>`_.
-
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
@@ -74,7 +71,7 @@ Please cite the following two papers if you use SINGA in your research:
    :hidden:
 
    downloads
-   docs
+   docs/index
 
 .. toctree::
    :hidden:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.1.0.md
----------------------------------------------------------------------
diff --git a/doc/en/releases/RELEASE_NOTES_0.1.0.md b/doc/en/releases/RELEASE_NOTES_0.1.0.md
new file mode 100644
index 0000000..2674d90
--- /dev/null
+++ b/doc/en/releases/RELEASE_NOTES_0.1.0.md
@@ -0,0 +1,99 @@
+#singa-incubating-0.1.0 Release Notes
+
+---
+
+SINGA is a general distributed deep learning platform for training big deep learning models over large datasets. It is
+designed with an intuitive programming model based on the layer abstraction. SINGA supports a wide variety of popular
+deep learning models.
+
+This release includes following features:
+
+  * Job management
+    * [SINGA-3](https://issues.apache.org/jira/browse/SINGA-3)  Use Zookeeper to check stopping (finish) time of the system
+    * [SINGA-16](https://issues.apache.org/jira/browse/SINGA-16)  Runtime Process id Management
+    * [SINGA-25](https://issues.apache.org/jira/browse/SINGA-25)  Setup glog output path
+    * [SINGA-26](https://issues.apache.org/jira/browse/SINGA-26)  Run distributed training in a single command
+    * [SINGA-30](https://issues.apache.org/jira/browse/SINGA-30)  Enhance easy-to-use feature and support concurrent jobs
+    * [SINGA-33](https://issues.apache.org/jira/browse/SINGA-33)  Automatically launch a number of processes in the cluster
+    * [SINGA-34](https://issues.apache.org/jira/browse/SINGA-34)  Support external zookeeper service
+    * [SINGA-38](https://issues.apache.org/jira/browse/SINGA-38)  Support concurrent jobs
+    * [SINGA-39](https://issues.apache.org/jira/browse/SINGA-39)  Avoid ssh in scripts for single node environment
+    * [SINGA-43](https://issues.apache.org/jira/browse/SINGA-43)  Remove Job-related output from workspace
+    * [SINGA-56](https://issues.apache.org/jira/browse/SINGA-56)  No automatic launching of zookeeper service
+    * [SINGA-73](https://issues.apache.org/jira/browse/SINGA-73)  Refine the selection of available hosts from host list
+
+
+  * Installation with GNU Auto tool
+    * [SINGA-4](https://issues.apache.org/jira/browse/SINGA-4)  Refine thirdparty-dependency installation
+    * [SINGA-13](https://issues.apache.org/jira/browse/SINGA-13)  Separate intermediate files of compilation from source files
+    * [SINGA-17](https://issues.apache.org/jira/browse/SINGA-17)  Add root permission within thirdparty/install.
+    * [SINGA-27](https://issues.apache.org/jira/browse/SINGA-27)  Generate python modules for proto objects
+    * [SINGA-53](https://issues.apache.org/jira/browse/SINGA-53)  Add lmdb compiling options
+    * [SINGA-62](https://issues.apache.org/jira/browse/SINGA-62)  Remove building scrips and auxiliary files
+    * [SINGA-67](https://issues.apache.org/jira/browse/SINGA-67)  Add singatest into build targets
+
+
+  * Distributed training
+    * [SINGA-7](https://issues.apache.org/jira/browse/SINGA-7)  Implement shared memory Hogwild algorithm
+    * [SINGA-8](https://issues.apache.org/jira/browse/SINGA-8)  Implement distributed Hogwild
+    * [SINGA-19](https://issues.apache.org/jira/browse/SINGA-19)  Slice large Param objects for load-balance
+    * [SINGA-29](https://issues.apache.org/jira/browse/SINGA-29)  Update NeuralNet class to enable layer partition type customization
+    * [SINGA-24](https://issues.apache.org/jira/browse/SINGA-24)  Implement Downpour training framework
+    * [SINGA-32](https://issues.apache.org/jira/browse/SINGA-32)  Implement AllReduce training framework
+    * [SINGA-57](https://issues.apache.org/jira/browse/SINGA-57)  Improve Distributed Hogwild
+
+
+  * Training algorithms for different model categories
+    * [SINGA-9](https://issues.apache.org/jira/browse/SINGA-9)  Add Support for Restricted Boltzman Machine (RBM) model
+    * [SINGA-10](https://issues.apache.org/jira/browse/SINGA-10)  Add Support for Recurrent Neural Networks (RNN)
+
+
+  * Checkpoint and restore
+    * [SINGA-12](https://issues.apache.org/jira/browse/SINGA-12)  Support Checkpoint and Restore
+
+
+  * Unit test
+    * [SINGA-64](https://issues.apache.org/jira/browse/SINGA-64)  Add the test module for utils/common
+
+
+  * Programming model
+    * [SINGA-36](https://issues.apache.org/jira/browse/SINGA-36)  Refactor job configuration, driver program and scripts
+    * [SINGA-37](https://issues.apache.org/jira/browse/SINGA-37)  Enable users to set parameter sharing in model configuration
+    * [SINGA-54](https://issues.apache.org/jira/browse/SINGA-54)  Refactor job configuration to move fields in ModelProto out
+    * [SINGA-55](https://issues.apache.org/jira/browse/SINGA-55)  Refactor main.cc and singa.h
+    * [SINGA-61](https://issues.apache.org/jira/browse/SINGA-61)  Support user defined classes
+    * [SINGA-65](https://issues.apache.org/jira/browse/SINGA-65)  Add an example of writing user-defined layers
+
+
+  * Other features
+    * [SINGA-6](https://issues.apache.org/jira/browse/SINGA-6)  Implement thread-safe singleton
+    * [SINGA-18](https://issues.apache.org/jira/browse/SINGA-18)  Update API for displaying performance metric
+    * [SINGA-77](https://issues.apache.org/jira/browse/SINGA-77)  Integrate with Apache RAT
+
+
+Some bugs are fixed during the development of this release
+
+  * [SINGA-2](https://issues.apache.org/jira/browse/SINGA-2) Check failed: zsock_connect
+  * [SINGA-5](https://issues.apache.org/jira/browse/SINGA-5) Server early terminate when zookeeper singa folder is not initially empty
+  * [SINGA-15](https://issues.apache.org/jira/browse/SINGA-15) Fixg a bug from ConnectStub function which gets stuck for connecting layer_dealer_
+  * [SINGA-22](https://issues.apache.org/jira/browse/SINGA-22) Cannot find openblas library when it is installed in default path
+  * [SINGA-23](https://issues.apache.org/jira/browse/SINGA-23) Libtool version mismatch error.
+  * [SINGA-28](https://issues.apache.org/jira/browse/SINGA-28) Fix a bug from topology sort of Graph
+  * [SINGA-42](https://issues.apache.org/jira/browse/SINGA-42) Issue when loading checkpoints
+  * [SINGA-44](https://issues.apache.org/jira/browse/SINGA-44) A bug when reseting metric values
+  * [SINGA-46](https://issues.apache.org/jira/browse/SINGA-46) Fix a bug in updater.cc to scale the gradients
+  * [SINGA-47](https://issues.apache.org/jira/browse/SINGA-47) Fix a bug in data layers that leads to out-of-memory when group size is too large
+  * [SINGA-48](https://issues.apache.org/jira/browse/SINGA-48) Fix a bug in trainer.cc that assigns the same NeuralNet instance to workers from diff groups
+  * [SINGA-49](https://issues.apache.org/jira/browse/SINGA-49) Fix a bug in HandlePutMsg func that sets param fields to invalid values
+  * [SINGA-66](https://issues.apache.org/jira/browse/SINGA-66) Fix bugs in Worker::RunOneBatch function and ClusterProto
+  * [SINGA-79](https://issues.apache.org/jira/browse/SINGA-79) Fix bug in singatool that can not parse -conf flag
+
+
+Features planned for the next release
+
+  * [SINGA-11](https://issues.apache.org/jira/browse/SINGA-11) Start SINGA using Mesos
+  * [SINGA-31](https://issues.apache.org/jira/browse/SINGA-31) Extend Blob to support xpu (cpu or gpu)
+  * [SINGA-35](https://issues.apache.org/jira/browse/SINGA-35) Add random number generators
+  * [SINGA-40](https://issues.apache.org/jira/browse/SINGA-40) Support sparse Param update
+  * [SINGA-41](https://issues.apache.org/jira/browse/SINGA-41) Support single node single GPU training
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.2.0.md
----------------------------------------------------------------------
diff --git a/doc/en/releases/RELEASE_NOTES_0.2.0.md b/doc/en/releases/RELEASE_NOTES_0.2.0.md
new file mode 100644
index 0000000..38f498a
--- /dev/null
+++ b/doc/en/releases/RELEASE_NOTES_0.2.0.md
@@ -0,0 +1,84 @@
+#singa-incubating-0.2.0 Release Notes
+
+---
+
+SINGA is a general distributed deep learning platform for training big deep
+learning models over large datasets. It is designed with an intuitive
+programming model based on the layer abstraction. SINGA supports a wide variety
+of popular deep learning models.
+
+This release includes the following **major features**:
+
+* [Training on GPU](../docs/gpu.html) enables training of complex models on a single node with multiple GPU cards.
+* [Hybrid neural net partitioning](../docs/hybrid.html) supports data and model parallelism at the same time.
+* [Python wrapper](../docs/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
+* [RNN model and BPTT algorithm](../docs/general-rnn.html) are implemented to support applications based on RNN models, e.g., GRU.
+* [Cloud software integration](../docs/distributed-training.md) includes Mesos, Docker and HDFS.
+
+
+**More details** are listed as follows,
+
+  * Programming model
+    * [SINGA-80] New Blob Level and Address Level Math Operation Interface
+    * [SINGA-82] Refactor input layers using data store abstraction
+    * [SINGA-87] Replace exclude field to include field for layer configuration
+    * [SINGA-110] Add Layer member datavec_ and gradvec_
+    * [SINGA-120] Implemented GRU and BPTT (BPTTWorker)
+
+
+  * Neuralnet layers
+    * [SINGA-91] Add SoftmaxLayer and ArgSortLayer
+    * [SINGA-106] Add dummy layer for test purpose
+    * [SINGA-120] Implemented GRU and BPTT (GRULayer and OneHotLayer)
+
+
+  * GPU training support
+    * [SINGA-100] Implement layers using CUDNN for GPU training
+    * [SINGA-104] Add Context Class
+    * [SINGA-105] Update GUN make files for compiling cuda related code
+    * [SINGA-98] Add Support for AlexNet ImageNet Classification Model
+
+
+  * Model/Hybrid partition
+    * [SINGA-109] Refine bridge layers
+    * [SINGA-111] Add slice, concate and split layers
+    * [SINGA-113] Model/Hybrid Partition Support
+
+
+  * Python binding
+    * [SINGA-108] Add Python wrapper to singa
+
+
+  * Predict-only mode
+    * [SINGA-85] Add functions for extracting features and test new data
+
+
+  * Integrate with third-party tools
+    * [SINGA-11] Start SINGA on Apache Mesos
+    * [SINGA-78] Use Doxygen to generate documentation
+    * [SINGA-89] Add Docker support
+
+
+  * Unit test
+    * [SINGA-95] Add make test after building
+
+
+  * Other improvment
+    * [SINGA-84] Header Files Rearrange
+    * [SINGA-93] Remove the asterisk in the log tcp://169.254.12.152:*:49152
+    * [SINGA-94] Move call to google::InitGoogleLogging() from Driver::Init() to main()
+    * [SINGA-96] Add Momentum to Cifar10 Example
+    * [SINGA-101] Add ll (ls -l) command in .bashrc file when using docker
+    * [SINGA-114] Remove short logs in tmp directory
+    * [SINGA-115] Print layer debug information in the neural net graph file
+    * [SINGA-118] Make protobuf LayerType field id easy to assign
+    * [SIGNA-97] Add HDFS Store
+
+
+  * Bugs fixed
+    * [SINGA-85] Fix compilation errors in examples
+    * [SINGA-90] Miscellaneous trivial bug fixes
+    * [SINGA-107] Error from loading pre-trained params for training stacked RBMs
+    * [SINGA-116] Fix a bug in InnerProductLayer caused by weight matrix sharing
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.3.0.md
----------------------------------------------------------------------
diff --git a/doc/en/releases/RELEASE_NOTES_0.3.0.md b/doc/en/releases/RELEASE_NOTES_0.3.0.md
new file mode 100644
index 0000000..c169e12
--- /dev/null
+++ b/doc/en/releases/RELEASE_NOTES_0.3.0.md
@@ -0,0 +1,37 @@
+#singa-incubating-0.3.0 Release Notes
+
+---
+
+SINGA is a general distributed deep learning platform for training big deep
+learning models over large datasets. It is designed with an intuitive
+programming model based on the layer abstraction. SINGA supports a wide variety
+of popular deep learning models.
+
+This release includes following features:
+
+  * GPU Support
+    * [SINGA-131] Implement and optimize hybrid training using both CPU and GPU
+    * [SINGA-136] Support cuDNN v4
+    * [SINGA-134] Extend SINGA to run over a GPU cluster
+    * [Singa-157] Change the priority of cudnn library and install libsingagpu.so
+
+  * Remove Dependences
+    * [SINGA-156] Remove the dependency on ZMQ for single process training
+    * [SINGA-155] Remove zookeeper for single-process training
+
+  * Python Binding
+    * [SINGA-126] Python Binding for Interactive Training
+
+  * Other Improvements
+    * [SINGA-80] New Blob Level and Address Level Math Operation Interface
+    * [SINGA-130] Data Prefetching
+    * [SINGA-145] New SGD based optimization Updaters: AdaDelta, Adam, AdamMax
+
+  * Bugs Fixed
+    * [SINGA-148] Race condition between Worker threads and Driver
+    * [SINGA-150] Mesos Docker container failed
+    * [SIGNA-141] Undesired Hash collision when locating process id to worker\u2026
+    * [SINGA-149] Docker build fail
+    * [Singa-143] The compilation cannot detect libsingagpu.so file
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/zh/conf.py
----------------------------------------------------------------------
diff --git a/doc/zh/conf.py b/doc/zh/conf.py
index 332a0d1..921a27a 100755
--- a/doc/zh/conf.py
+++ b/doc/zh/conf.py
@@ -19,7 +19,7 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../build/python'))
+sys.path.insert(1, os.path.abspath('../../build/python'))
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 83771c2..137df80 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -98,9 +98,9 @@ def get_lr(epoch):
 
 
 def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
-          num_stacks=1, lr=0.001, dropout=0.5, model_path='model.bin'):
+          num_stacks=1, dropout=0.5, model_path='model.bin'):
     # SGD with L2 gradient normalization
-    opt = optimizer.SGD(constraint=optimizer.L2Constraint(5))
+    opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
     rnn = layer.LSTM(
         name='lstm',
@@ -126,7 +126,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
     dense_b = dense.param_values()[1]
     print 'dense w ', dense_w.shape
     print 'dense b ', dense_b.shape
-    initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
+    initializer.uniform(dense_w, dense_w.shape[0], 0)
     print 'dense weight l1 = %f' % (dense_w.l1())
     dense_b.set_value(0)
     print 'dense b l1 = %f' % (dense_b.l1())
@@ -154,6 +154,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
                 lvalue = lossfun.forward(model_pb2.kTrain, act, label)
                 batch_loss += lvalue.l1()
                 grad = lossfun.backward()
+                grad /= batch_size
                 grad, gwb = dense.backward(model_pb2.kTrain, grad)
                 grads.append(grad)
                 g_dense_w += gwb[0]

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/README.md
----------------------------------------------------------------------
diff --git a/examples/cifar10/README.md b/examples/cifar10/README.md
new file mode 100644
index 0000000..5333e6f
--- /dev/null
+++ b/examples/cifar10/README.md
@@ -0,0 +1,69 @@
+# Train CNN over Cifar-10
+
+
+Convolution neural network (CNN) is a type of feed-forward artificial neural
+network widely used for image and video classification. In this example, we
+will train three deep CNN models to do image classification for the CIFAR-10 dataset,
+
+1. [AlexNet](https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-18pct.cfg)
+the best validation accuracy (without data augmentation) we achieved was about 82%.
+
+2. [VGGNet](http://torch.ch/blog/2015/07/30/cifar.html), the best validation accuracy (without data augmentation) we achieved was about 89%.
+3. [ResNet](https://github.com/facebook/fb.resnet.torch), the best validation accuracy (without data augmentation) we achieved was about 83%.
+
+
+## Instructions
+
+
+### SINGA installation
+
+Users can compile and install SINGA from source or install the Python version.
+The code can ran on both CPU and GPU. For GPU training, CUDA and CUDNN (V4 or V5)
+are required. Please refer to the installation page for detailed instructions.
+
+
+
+### Training
+
+There are four training programs
+
+1. train.py. The following command would train the VGG model using the python
+version of the Cifar-10 dataset in 'cifar-10-batches-py' folder.
+
+        python train.py vgg cifar-10-batches-py
+
+    To train other models, please replace 'vgg' to 'alexnet' or 'resnet'. By default
+    the training would run on a CudaGPU device, to run it on CppCPU, add an additional
+    argument
+
+        python train.py vgg cifar-10-batches-py  --use_cpu
+
+2. alexnet.cc. It trains the AlexNet model using the CPP APIs on a CudaGPU,
+
+        run.sh
+
+3. alexnet-parallel.cc. It trains the AlexNet model using the CPP APIs on two CudaGPU devices.
+The two devices run synchronously to compute the gradients of the mode parameters, which are
+averaged on the host CPU device and then be applied to update the parameters.
+
+        run-parallel.sh
+
+4. vgg-parallel.cc. It train the VGG model using the CPP APIs on two CudaGPU devices similar to alexnet-parallel.cc.
+
+### Prediction
+
+predict.py includes the prediction function
+
+        def predict(net, images, dev, topk=5)
+
+The net is created by loading the previously trained model; Images consist of
+a numpy array of images (one row per image); dev is the training device, e.g.,
+a CudaGPU device or the host CppCPU device; topk labels of each image would be
+returned.
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index 9e8a7d8..8a506d2 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -161,7 +161,7 @@ void Train(int num_epoch, string data_dir) {
   auto net = CreateNet();
   SGD sgd;
   OptimizerConf opt_conf;
-  // opt_conf.set_momentum(0.9);
+  opt_conf.set_momentum(0.9);
   auto reg = opt_conf.mutable_regularizer();
   reg->set_coefficient(0.004);
   sgd.Setup(opt_conf);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index 2091ee5..8f596e5 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -106,7 +106,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         dev = device.create_cuda_gpu()
 
     net.to_device(dev)
-    opt = optimizer.SGD(momentum=0.9, decay=weight_decay)
+    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
     for (p, specs) in zip(net.param_names(), net.param_specs()):
         opt.register(p, specs)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
index 55c7cbb..0a00358 100644
--- a/examples/mnist/train.py
+++ b/examples/mnist/train.py
@@ -70,7 +70,6 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
     print "num_train_batch = %d " % (num_train_batch)
     for epoch in range(num_epoch):
         trainerrorsum = 0.0
-        validerrorsum = 0.0
         print 'Epoch %d' % epoch
         for b in range(num_train_batch):
             # positive phase

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 74e6ade..00380e0 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -234,6 +234,8 @@ class Nesterov(Optimizer):
 
     def apply_with_lr(self, epoch, lr, grad, value, name):
         self.apply_regularizer_constraint(epoch, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 6c21034..593cfd6 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -22,7 +22,7 @@ ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
 ADD_DEPENDENCIES(test_singa singa_core singa_utils)
 #MESSAGE(STATUS "link libs" ${singa_linker_libs})
 TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
-    singa_io proto protobuf ${SINGA_LINKER_LIBS})
+    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
 IF(UNIX AND (NOT APPLE))
     LIST(APPEND LINK_FLAGS "-pthread")
 ENDIF()

[32/51] [abbrv] incubator-singa git commit: SINGA-223 Use Sphinx to create the website.

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/mail-lists.rst
----------------------------------------------------------------------
diff --git a/doc/en/community/mail-lists.rst b/doc/en/community/mail-lists.rst
new file mode 100644
index 0000000..02b39de
--- /dev/null
+++ b/doc/en/community/mail-lists.rst
@@ -0,0 +1,10 @@
+Project Mailing Lists
+=====================
+
+These are the mailing lists that have been established for this project. For each list, there is a subscribe, unsubscribe, and an archive link.
+
+.. csv-table:: Mailing Lists
+	:header: "Name", "Post", "Subscribe", "Unsubscribe", "Archive"
+
+        "Development", "dev@singa.incubator.apache.org", "`Subscribe <ma...@singa.incubator.apache.org>`_", "`Unsubscribe <ma...@singa.incubator.apache.org.>`_", "`mail-archives.apache.org <http://mail-archives.apache.org/mod_mbox/singa-dev/>`_"
+        "Commits", "commits@singa.incubator.apache.org", "`Subscribe <ma...@singa.incubator.apache.org>`_", "`Unsubscribe <ma...@singa.incubator.apache.org>`_", "`mail-archives.apache.org  <http://mail-archives.apache.org/mod_mbox/singa-commits/>`_"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/source-repository.md
----------------------------------------------------------------------
diff --git a/doc/en/community/source-repository.md b/doc/en/community/source-repository.md
new file mode 100644
index 0000000..8864629
--- /dev/null
+++ b/doc/en/community/source-repository.md
@@ -0,0 +1,22 @@
+# Source Repository
+
+___
+
+This project uses [Git](http://git-scm.com/) to manage its source code. Instructions on Git use can be found at [http://git-scm.com/documentation](http://git-scm.com/documentation).
+
+## Web Access
+
+The following is a link to the online source repository.
+
+* [https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary](https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary)
+
+
+## Upstream for committers
+
+Committers need to set the upstream endpoint to the Apache git (not github) repo address, e.g.,
+
+    $ git remote add asf https://git-wip-us.apache.org/repos/asf/incubator-singa.git
+
+Then you (committer) can push your code in this way,
+
+    $ git push asf <local-branch>:<remote-branch>

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/team-list.rst
----------------------------------------------------------------------
diff --git a/doc/en/community/team-list.rst b/doc/en/community/team-list.rst
new file mode 100644
index 0000000..a677aff
--- /dev/null
+++ b/doc/en/community/team-list.rst
@@ -0,0 +1,64 @@
+The SINGA Team
+==============
+
+A successful project requires many people to play many roles. Some members write code or documentation, while others are valuable as testers, submitting patches and suggestions.
+
+Mentors
+-------
+
+==================   ============
+Name                 Email
+==================   ============
+Daniel Dai           daijy@apache.org
+Ted Dunning	     tdunning@apache.org
+Alan Gates	     gates@apache.org
+Thejas Nair	     thejas@apache.org
+==================   ============
+
+Developers
+----------
+
++-------------------+--------------------------------+----------------------------------------------+
+| Name              |  Email                         |  Organization                                |
++-------------------+--------------------------------+----------------------------------------------+
+|Gang Chen          |  cg@zju.edu.cn                 |   Zhejiang University                        |
++-------------------+--------------------------------+----------------------------------------------+
+| Haibo Chen        | hzchenhaibo@corp.netease.com   |  NetEase                                     |
++-------------------+--------------------------------+----------------------------------------------+
+| Anh Dinh	    |     dinhtta@apache.org	     |         National University of Singapore     |                       
++-------------------+--------------------------------+----------------------------------------------+
+| Jinyang Gao	    |     jinyang@apache.org	     |         National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Xing Ji	    |         jixin@comp.nus.edu.sg  |          National University of Singapore    |
++-------------------+--------------------------------+----------------------------------------------+
+| Chonho Lee	    |  chonho@gmail.com              |   National University of Singapore           |
++-------------------+--------------------------------+----------------------------------------------+
+| Zhaojing Luo	    | zhaojing@apache.org	     | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+| Beng Chin Ooi	    | ooibc@comp.nus.edu.sg          | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+| Kian-Lee Tan	    |    tankl@apache.org            | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+|Anthony K. H. Tung |  atung@comp.nus.edu.sg         |   National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Ji Wang	    |         wangji@comp.nus.edu.sg |	      National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Sheng Wang	    |    wangsh@apache.org           | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+| Wei Wang	    |    wangwei@apache.org	     |         National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Yuan Wang         |  wangyuan@corp.netease.com     |   NetEase                                    |
++-------------------+--------------------------------+----------------------------------------------+
+| Wenfeng Wu	    |     wuwf@comp.nus.edu.sg       |  National University of Singapore            |
++-------------------+--------------------------------+----------------------------------------------+
+| Zhongle Xie	    |     zhongle@apache.org	     |        National University of Singapore      |
++-------------------+--------------------------------+----------------------------------------------+
+| Meihui Zhang	    |     meihui_zhang@sutd.edu.sg   |Singapore University of Technology and Design |
++-------------------+--------------------------------+----------------------------------------------+
+| Kaiping Zheng     |     kaiping@apache.org	     |         National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Ming Zhong        | hzzhongming15@corp.netease.com |   Zhejiang University                        |
++-------------------+--------------------------------+----------------------------------------------+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/conf.py
----------------------------------------------------------------------
diff --git a/doc/en/conf.py b/doc/en/conf.py
new file mode 100755
index 0000000..332a0d1
--- /dev/null
+++ b/doc/en/conf.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+#
+# incubator-singa documentation build configuration file, created by
+# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, os.path.abspath('../build/python'))
+
+# -- General configuration ------------------------------------------------
+from recommonmark.parser import CommonMarkParser
+
+source_parsers = {
+    '.md': CommonMarkParser,
+}
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ['.rst', '.md']
+
+# The encoding of source files.
+#
+source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'incubator-singa'
+copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
+author = u'moaz'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'1.0.0'
+# The full version, including alpha/beta/rc tags.
+release = u'1.0.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+# html_title = u'Singa v1.0.0'
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+html_logo = 'image/singa.png'
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['../_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+html_show_sourcelink = False
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Singadoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
+     u'moaz', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     author, 'incubator-singa', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/contribute-code.md
----------------------------------------------------------------------
diff --git a/doc/en/develop/contribute-code.md b/doc/en/develop/contribute-code.md
new file mode 100644
index 0000000..98e5aee
--- /dev/null
+++ b/doc/en/develop/contribute-code.md
@@ -0,0 +1,60 @@
+## How to Contribute Code
+
+_____
+
+### Coding Style
+
+The SINGA codebase follows the [Google C++ Style Guide](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml).
+
+To check if your code follows the style, you can use the provided cpplint tool:
+    
+    $ ./tool/cpplint.py YOUR_FILE
+
+
+### JIRA format
+
+Like other Apache projects, SINGA uses JIRA to track bugs, improvements and
+other high-level discussions (e.g., system design and features).  Github pull requests are
+used for implementation discussions, e.g., code review and code merge.
+
+* Provide a descriptive Title.
+* Write a detailed Description. For bug reports, this should ideally include a
+  short reproduction of the problem. For new features, it may include a design
+  document.
+* Set [required fields](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-JIRA)
+
+### Pull Request
+
+The work flow is
+
+* Fork the [SINGA Github repository](https://github.com/apache/incubator-singa) to
+your own Github account.
+
+* Clone your fork, create a new branch (e.g., feature-foo or fixbug-foo),
+ work on it. After finishing your job,
+ [rebase](https://git-scm.com/book/en/v2/Git-Branching-Rebasing) it to the
+ current latest master and push commits to your own Github account (the new
+ branch).
+
+* Open a pull request against the master branch of apache/incubator-singa.
+The PR title should be of the form SINGA-xxxx Title, where
+SINGA-xxxx is the relevant JIRA number, and Title may be the JIRA's title or a
+more specific title describing the PR itself, for example, "SINGA-6 Implement thread-safe singleton". Detailed description can be copied from the JIRA.
+Consider identifying committers or other contributors who have worked on the
+code being changed. Find the file(s) in Github and click "Blame" to see a
+line-by-line annotation of who changed the code last.  You can add @username in
+the PR description to ping them immediately.
+Please state that the contribution is your original work and that you license
+the work to the project under the project's open source license. Further commits (e.g., bug fix)
+to your new branch will be added to this pull request automatically by Github.
+
+* Wait for one committer to review the patch. If no conflicts, the committers will merge it with
+the master branch. The merge should a) not use rebase b) disable fast forward merge c) check the 
+commit message format and test the code/feature.
+
+* If there are too many small commit messages, you will be told to squash your commits into fewer meaningful
+commits. If your commit message does not follow the format (i.e., SINGA-xxxx), you will be told to
+reword your commit message. Both changes can be done using interactive git rebase. Once you
+get the commits corrected, push them to you own github again. Your pull request 
+will be automatically updated. For details, please refer to 
+[Rebase Pull Requests](https://github.com/edx/edx-platform/wiki/How-to-Rebase-a-Pull-Request).
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/contribute-docs.md
----------------------------------------------------------------------
diff --git a/doc/en/develop/contribute-docs.md b/doc/en/develop/contribute-docs.md
new file mode 100644
index 0000000..5e21a0f
--- /dev/null
+++ b/doc/en/develop/contribute-docs.md
@@ -0,0 +1,28 @@
+# How to Contribute Documentation
+
+___
+
+
+## Website
+This document gives step-by-step instructions for deploying [Singa website](http://singa.incubator.apache.org).
+
+Singa website is built by [Sphinx](http://www.sphinx-doc.org) 1.4.4 from a source tree stored in git: https://github.com/apache/incubator-singa/tree/master/doc.
+
+To install Sphinx on Ubuntu:
+
+    $ apt-get install python-sphinx
+
+To install the markdown support for Sphinx:
+
+    $ pip install recommonmark
+
+You can build the website by executing the following command from the doc folder:
+
+    $ make html
+
+The procedure for contributing documentation is the same as [contributing code](contribute-code.html).
+
+
+## CPP API
+
+To generate docs, run "doxygen" from the doc folder (Doxygen >= 1.8 recommended)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/how-contribute.md
----------------------------------------------------------------------
diff --git a/doc/en/develop/how-contribute.md b/doc/en/develop/how-contribute.md
new file mode 100644
index 0000000..8687b5a
--- /dev/null
+++ b/doc/en/develop/how-contribute.md
@@ -0,0 +1,11 @@
+# How to Contribute to SINGA
+
+___
+
+As with any open source project, there are several ways you can help:
+
+* Join the [mailing list](../community/mail-lists.html) and answer other user's questions.
+* [Build Singa](../quick-start.html) for yourself, in order to fix bugs.
+* Report bugs, feature requests and other issues in the [issue tracking](../community/issue-tracking.html) application.
+* Check SINGA's [development schedule](schedule.html) and [contribute code](contribute-code.html) by providing patches.
+* [Help with the documentation](contribute-docs.html) by updating webpages that are lacking or unclear.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/schedule.rst
----------------------------------------------------------------------
diff --git a/doc/en/develop/schedule.rst b/doc/en/develop/schedule.rst
new file mode 100644
index 0000000..2afe54f
--- /dev/null
+++ b/doc/en/develop/schedule.rst
@@ -0,0 +1,40 @@
+Development Schedule
+====================
+
+.. csv-table::
+	:header: "Release", "Module", "Feature", "Status"
+
+	" 0.1 Sep 2015     "," Neural Network          "," Feed forward neural network, including CNN, MLP                                                                 "," done  "
+	"                  ","                         "," RBM-like model, including RBM                                                                                   "," done   "
+	"                  ","                         "," Recurrent neural network, including standard RNN                                                                "," done   "
+	"                  ","  Architecture           "," One worker group on single node (with data partition)                                                           "," done   "
+	"                  ","                         "," Multi worker groups on single node using [Hogwild](http://www.eecs.berkeley.edu/~brecht/papers/hogwildTR.pdf)      ","done"
+	"                  ","                         "," Distributed Hogwild","done"
+	"                  ","                         "," Multi groups across nodes, like [Downpour](http://papers.nips.cc/paper/4687-large-scale-distributed-deep-networks) ","done"
+	"                  ","                         "," All-Reduce training architecture like [DeepImage](http://arxiv.org/abs/1501.02876) ","done"
+	"                  ","                         "," Load-balance among servers "," done"
+	"                  ","  Failure recovery       "," Checkpoint and restore ","done"
+	"                  ","  Tools                  "," Installation with GNU auto tools"," done"
+	"0.2 Jan 2016      "," Neural Network          "," Feed forward neural network, including AlexNet, cuDNN layers, etc."," done "
+	"                  ","                         "," Recurrent neural network, including GRULayer and BPTT","done "
+	"                  ","                         "," Model partition and hybrid partition","done"
+	"      		   "," Tools                   "," Integration with Mesos for resource management","done"
+	"         	   ","                         "," Prepare Docker images for deployment","done"
+	"              	   ","                         "," Visualization of neural net and debug information ","done"
+	"                  "," Binding                 "," Python binding for major components ","done"
+	"                  "," GPU                     "," Single node with multiple GPUs ","done"
+	"0.3 April 2016    "," GPU                     "," Multiple nodes, each with multiple GPUs","done"
+	"                  ","                         "," Heterogeneous training using both GPU and CPU [CcT](http://arxiv.org/abs/1504.04343)","done"
+	"                  ","                         "," Support cuDNN v4 "," done"
+	"                  "," Installation            "," Remove dependency on ZeroMQ, CZMQ, Zookeeper for single node training","done"
+	"                  "," Updater                 "," Add new SGD updaters including Adam, AdamMax and AdaDelta","done"
+	"                  "," Binding                 "," Enhance Python binding for training","done"
+	"1.0 July 2016     "," Programming abstraction ","Tensor with linear algebra, neural net and random operations "," "
+	"                  ","                         ","Updater for distributed parameter updating ",""
+	"                  "," Optimization            "," Execution and memory optimization",""
+	"                  "," Hardware                "," Use Cuda and Cudnn for Nvidia GPU",""
+	"                  ","                         "," Use OpenCL for AMD GPU or other devices",""
+	"                  "," Cross-platform          "," To extend from Linux to MacOS and Windows",""
+	"                  "," Examples                "," Speech recognition example",""
+	"                  ","                         ","Large image models, e.g., [GoogLeNet](http://arxiv.org/abs/1409.4842), [VGG](https://arxiv.org/pdf/1409.1556.pdf) and [Residual Net](http://arxiv.org/abs/1512.03385)",""
+	"     "," Rafiki                  "," Deep learning as a service "," "

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs.rst b/doc/en/docs.rst
new file mode 100644
index 0000000..400b12a
--- /dev/null
+++ b/doc/en/docs.rst
@@ -0,0 +1,6 @@
+Documentation
+=============
+
+.. toctree::
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/cnn.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/cnn.md b/doc/en/docs/cnn.md
new file mode 100755
index 0000000..21ef1f7
--- /dev/null
+++ b/doc/en/docs/cnn.md
@@ -0,0 +1,141 @@
+#Quickstart - Cifar10 example
+Convolution neural network (CNN) is a type of feed-forward artificial neural network widely used for image classification. In this example, we will use a deep CNN model to do image classification for the [CIFAR10 dataset](http://www.cs.toronto.edu/~kriz/cifar.html).
+
+## Running instructions for CPP version
+Please refer to [Installation](installation.html) page for how to install SINGA. Currently, we CNN requires CUDNN, hence both CUDA and CUDNN should be installed and SINGA should be compiled with CUDA and CUDNN.
+
+The Cifar10 dataset could be downloaded by running
+
+    # switch to cifar10 directory
+    $ cd ../examples/cifar10
+    # download data for CPP version
+    $ python download_data.py bin
+
+'bin' is for downloading binary version of Cifar10 data.
+
+During downloading, you should see the detailed output like
+
+     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
+     The tar file does exist. Extracting it now..
+     Finished!
+
+Now you have prepared the data for this Cifar10 example, the final step is to execute the `run.sh` script,
+
+    # in SINGA_ROOT/examples/cifar10/
+    $ ./run.sh
+
+You should see the detailed output as follows: first read the data files in order, show the statistics of training and testing data, then show the details of neural net structure with some parameter information, finally illustrate the performance details during training and validation process. The number of epochs can be specified in `run.sh` file.
+
+    Start training
+    Reading file cifar-10-batches-bin/data_batch_1.bin
+    Reading file cifar-10-batches-bin/data_batch_2.bin
+    Reading file cifar-10-batches-bin/data_batch_3.bin
+    Reading file cifar-10-batches-bin/data_batch_4.bin
+    Reading file cifar-10-batches-bin/data_batch_5.bin
+    Reading file cifar-10-batches-bin/test_batch.bin
+    Training samples = 50000, Test samples = 10000
+    conv1(32, 32, 32, )
+    pool1(32, 16, 16, )
+    relu1(32, 16, 16, )
+    lrn1(32, 16, 16, )
+    conv2(32, 16, 16, )
+    relu2(32, 16, 16, )
+    pool2(32, 8, 8, )
+    lrn2(32, 8, 8, )
+    conv3(64, 8, 8, )
+    relu3(64, 8, 8, )
+    pool3(64, 4, 4, )
+    flat(1024, )
+    ip(10, )
+    conv1_weight : 8.09309e-05
+    conv1_bias : 0
+    conv2_weight : 0.00797731
+    conv2_bias : 0
+    conv3_weight : 0.00795888
+    conv3_bias : 0
+    ip_weight : 0.00798683
+    ip_bias : 0
+    Messages will be appended to an existed file: train_perf
+    Messages will be appended to an existed file: val_perf
+    Epoch 0, training loss = 1.828369, accuracy = 0.329420, lr = 0.001000
+    Epoch 0, val loss = 1.561823, metric = 0.420600
+    Epoch 1, training loss = 1.465898, accuracy = 0.469940, lr = 0.001000
+    Epoch 1, val loss = 1.361778, metric = 0.513300
+    Epoch 2, training loss = 1.320708, accuracy = 0.529000, lr = 0.001000
+    Epoch 2, val loss = 1.242080, metric = 0.549100
+    Epoch 3, training loss = 1.213776, accuracy = 0.571620, lr = 0.001000
+    Epoch 3, val loss = 1.175346, metric = 0.582000
+
+The training details are stored in `train_perf` file in the same directory and the validation details in `val_perf` file.
+
+
+## Running instructions for Python version
+To run CNN example in Python version, we need to compile SINGA with Python binding,
+
+    $ mkdir build && cd build
+    $ cmake -DUSE_PYTHON=ON ..
+    $ make
+
+Now download the Cifar10 dataset,
+
+    # switch to cifar10 directory
+    $ cd ../examples/cifar10
+    # download data for Python version
+    $ python download_data.py py
+
+During downloading, you should see the detailed output like
+
+     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
+     The tar file does exist. Extracting it now..
+     Finished!
+
+Then execute the `train.py` script to build the model
+
+    $ python train.py
+
+You should see the output as follows including the details of neural net structure with some parameter information, reading data files, and the performance details during training and testing process.
+
+    (32L, 32L, 32L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 8L, 8L)
+    (32L, 8L, 8L)
+    (64L, 8L, 8L)
+    (64L, 8L, 8L)
+    (64L, 4L, 4L)
+    (1024L,)
+    Start intialization............
+    conv1_weight gaussian 7.938460476e-05
+    conv1_bias constant 0.0
+    conv2_weight gaussian 0.00793507322669
+    conv2_bias constant 0.0
+    conv3_weight gaussian 0.00799657031894
+    conv3_bias constant 0.0
+    dense_weight gaussian 0.00804364029318
+    dense_bias constant 0.0
+    Loading data ..................
+    Loading data file cifar-10-batches-py/data_batch_1
+    Loading data file cifar-10-batches-py/data_batch_2
+    Loading data file cifar-10-batches-py/data_batch_3
+    Loading data file cifar-10-batches-py/data_batch_4
+    Loading data file cifar-10-batches-py/data_batch_5
+    Loading data file cifar-10-batches-py/test_batch
+    Epoch 0
+    training loss = 1.881866, training accuracy = 0.306360 accuracy = 0.420000
+    test loss = 1.602577, test accuracy = 0.412200
+    Epoch 1
+    training loss = 1.536011, training accuracy = 0.441940 accuracy = 0.500000
+    test loss = 1.378170, test accuracy = 0.507600
+    Epoch 2
+    training loss = 1.333137, training accuracy = 0.519960 accuracy = 0.520000
+    test loss = 1.272205, test accuracy = 0.540600
+    Epoch 3
+    training loss = 1.185212, training accuracy = 0.574120 accuracy = 0.540000
+    test loss = 1.211573, test accuracy = 0.567600
+
+This script will call `alexnet.py` file to build the alexnet model. After the training is finished, SINGA will save the model parameters into a checkpoint file `model.bin` in the same directory. Then we can use this `model.bin` file for prediction.
+
+    $ python predict.py

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/device.rst b/doc/en/docs/device.rst
new file mode 100644
index 0000000..e79d87a
--- /dev/null
+++ b/doc/en/docs/device.rst
@@ -0,0 +1,38 @@
+Device
+=======
+
+
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Python API
+----------
+
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
+
+
+The following code provides examples of creating devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/index.rst b/doc/en/docs/index.rst
new file mode 100644
index 0000000..93315de
--- /dev/null
+++ b/doc/en/docs/index.rst
@@ -0,0 +1,10 @@
+English
+=======
+
+.. toctree::
+
+   installation
+   software_stack
+   device
+   tensor
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/installation.md b/doc/en/docs/installation.md
new file mode 100755
index 0000000..8ab617f
--- /dev/null
+++ b/doc/en/docs/installation.md
@@ -0,0 +1,69 @@
+# Building SINGA from source
+
+## Dependencies
+
+### Required
+* Google Protobuf (>=2.5)
+* BLAS (tested with OpenBLAS >=0.2.10)
+* CUDA (tested with 6.5, 7.0 and 7.5)
+* CUDNN (v4 and v5)
+* cmake (>=2.6)
+
+Users must install the above mandatory libraries.
+Currently CUDA and CUDNN are also mandatory, but it would become optional later.
+
+### Optional
+* Glog
+* OpenCV (tested with 2.4.8)
+* LMDB (tested with 0.9)
+
+
+## Instructions
+
+Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
+
+
+    $ git clone https://github.com/apache/incubator-singa.git
+    $ cd incubator-singa/
+    # switch to dev branch
+    $ git checkout dev
+
+
+If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
+which could be downloaded as
+
+    $ git submodule init
+    $ git submodule update
+
+
+### Linux OS
+
+GCC (>=4.8.1) is required to compile SINGA on Linux OS.
+In SINGA_ROOT, execute the following commands for compiling SINGA,
+
+    $ mkdir build && cd build
+    # generate Makefile for compilation
+    $ cmake ..
+    # compile SINGA
+    $ make
+
+Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
+
+    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
+    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
+
+You can use `ccmake ..` to configure the compilation options including using
+LMDB, GLOG, etc.
+
+After compiling SINGA, you can run the unit tests by
+
+    $ ./bin/test_singa
+
+You can see all the testing cases with testing results. If SINGA passes all
+tests, then you have successfully installed SINGA. Please proceed to try the examples!
+
+
+### MacOS
+
+
+### Windows

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/neural-net.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/neural-net.md b/doc/en/docs/neural-net.md
new file mode 100644
index 0000000..c10baf8
--- /dev/null
+++ b/doc/en/docs/neural-net.md
@@ -0,0 +1,327 @@
+# Neural Net
+
+---
+
+`NeuralNet` in SINGA represents an instance of user's neural net model. As the
+neural net typically consists of a set of layers, `NeuralNet` comprises
+a set of unidirectionally connected [Layer](layer.html)s.
+This page describes how to convert an user's neural net into
+the configuration of `NeuralNet`.
+
+<img src="../_static/images/model-category.png" align="center" width="200px"/>
+<span><strong>Figure 1 - Categorization of popular deep learning models.</strong></span>
+
+## Net structure configuration
+
+Users configure the `NeuralNet` by listing all layers of the neural net and
+specifying each layer's source layer names. Popular deep learning models can be
+categorized as Figure 1. The subsequent sections give details for each
+category.
+
+### Feed-forward models
+
+<div align = "left">
+<img src="../_static/images/mlp-net.png" align="center" width="200px"/>
+<span><strong>Figure 2 - Net structure of a MLP model.</strong></span>
+</div>
+
+Feed-forward models, e.g., CNN and MLP, can easily get configured as their layer
+connections are undirected without circles. The
+configuration for the MLP model shown in Figure 1 is as follows,
+
+    net {
+      layer {
+        name : 'data"
+        type : kData
+      }
+      layer {
+        name : 'image"
+        type : kImage
+        srclayer: 'data'
+      }
+      layer {
+        name : 'label"
+        type : kLabel
+        srclayer: 'data'
+      }
+      layer {
+        name : 'hidden"
+        type : kHidden
+        srclayer: 'image'
+      }
+      layer {
+        name : 'softmax"
+        type : kSoftmaxLoss
+        srclayer: 'hidden'
+        srclayer: 'label'
+      }
+    }
+
+### Energy models
+
+<img src="../_static/images/rbm-rnn.png" align="center" width="500px"/>
+<span><strong>Figure 3 - Convert connections in RBM and RNN.</strong></span>
+
+
+For energy models including RBM, DBM,
+etc., their connections are undirected (i.e., Category B). To represent these models using
+`NeuralNet`, users can simply replace each connection with two directed
+connections, as shown in Figure 3a. In other words, for each pair of connected layers, their source
+layer field should include each other's name.
+The full [RBM example](rbm.html) has
+detailed neural net configuration for a RBM model, which looks like
+
+    net {
+      layer {
+        name : "vis"
+        type : kVisLayer
+        param {
+          name : "w1"
+        }
+        srclayer: "hid"
+      }
+      layer {
+        name : "hid"
+        type : kHidLayer
+        param {
+          name : "w2"
+          share_from: "w1"
+        }
+        srclayer: "vis"
+      }
+    }
+
+### RNN models
+
+For recurrent neural networks (RNN), users can remove the recurrent connections
+by unrolling the recurrent layer.  For example, in Figure 3b, the original
+layer is unrolled into a new layer with 4 internal layers. In this way, the
+model is like a normal feed-forward model, thus can be configured similarly.
+The [RNN example](rnn.html) has a full neural net
+configuration for a RNN model.
+
+
+## Configuration for multiple nets
+
+Typically, a training job includes three neural nets for
+training, validation and test phase respectively. The three neural nets share most
+layers except the data layer, loss layer or output layer, etc..  To avoid
+redundant configurations for the shared layers, users can uses the `exclude`
+filed to filter a layer in the neural net, e.g., the following layer will be
+filtered when creating the testing `NeuralNet`.
+
+
+    layer {
+      ...
+      exclude : kTest # filter this layer for creating test net
+    }
+
+
+
+## Neural net partitioning
+
+A neural net can be partitioned in different ways to distribute the training
+over multiple workers.
+
+### Batch and feature dimension
+
+<img src="../_static/images/partition_fc.png" align="center" width="400px"/>
+<span><strong>Figure 4 - Partitioning of a fully connected layer.</strong></span>
+
+
+Every layer's feature blob is considered a matrix whose rows are feature
+vectors. Thus, one layer can be split on two dimensions. Partitioning on
+dimension 0 (also called batch dimension) slices the feature matrix by rows.
+For instance, if the mini-batch size is 256 and the layer is partitioned into 2
+sub-layers, each sub-layer would have 128 feature vectors in its feature blob.
+Partitioning on this dimension has no effect on the parameters, as every
+[Param](param.html) object is replicated in the sub-layers. Partitioning on dimension
+1 (also called feature dimension) slices the feature matrix by columns. For
+example, suppose the original feature vector has 50 units, after partitioning
+into 2 sub-layers, each sub-layer would have 25 units. This partitioning may
+result in [Param](param.html) object being split, as shown in
+Figure 4. Both the bias vector and weight matrix are
+partitioned into two sub-layers.
+
+
+### Partitioning configuration
+
+There are 4 partitioning schemes, whose configurations are give below,
+
+  1. Partitioning each singe layer into sub-layers on batch dimension (see
+  below). It is enabled by configuring the partition dimension of the layer to
+  0, e.g.,
+
+          # with other fields omitted
+          layer {
+            partition_dim: 0
+          }
+
+  2. Partitioning each singe layer into sub-layers on feature dimension (see
+  below).  It is enabled by configuring the partition dimension of the layer to
+  1, e.g.,
+
+          # with other fields omitted
+          layer {
+            partition_dim: 1
+          }
+
+  3. Partitioning all layers into different subsets. It is enabled by
+  configuring the location ID of a layer, e.g.,
+
+          # with other fields omitted
+          layer {
+            location: 1
+          }
+          layer {
+            location: 0
+          }
+
+
+  4. Hybrid partitioning of strategy 1, 2 and 3. The hybrid partitioning is
+  useful for large models. An example application is to implement the
+  [idea proposed by Alex](http://arxiv.org/abs/1404.5997).
+  Hybrid partitioning is configured like,
+
+          # with other fields omitted
+          layer {
+            location: 1
+          }
+          layer {
+            location: 0
+          }
+          layer {
+            partition_dim: 0
+            location: 0
+          }
+          layer {
+            partition_dim: 1
+            location: 0
+          }
+
+Currently SINGA supports strategy-2 well. Other partitioning strategies are
+are under test and will be released in later version.
+
+## Parameter sharing
+
+Parameters can be shared in two cases,
+
+  * sharing parameters among layers via user configuration. For example, the
+  visible layer and hidden layer of a RBM shares the weight matrix, which is configured through
+  the `share_from` field as shown in the above RBM configuration. The
+  configurations must be the same (except name) for shared parameters.
+
+  * due to neural net partitioning, some `Param` objects are replicated into
+  different workers, e.g., partitioning one layer on batch dimension. These
+  workers share parameter values. SINGA controls this kind of parameter
+  sharing automatically, users do not need to do any configuration.
+
+  * the `NeuralNet` for training and testing (and validation) share most layers
+  , thus share `Param` values.
+
+If the shared `Param` instances resident in the same process (may in different
+threads), they use the same chunk of memory space for their values. But they
+would have different memory spaces for their gradients. In fact, their
+gradients will be averaged by the stub or server.
+
+## Advanced user guide
+
+### Creation
+
+    static NeuralNet* NeuralNet::Create(const NetProto& np, Phase phase, int num);
+
+The above function creates a `NeuralNet` for a given phase, and returns a
+pointer to the `NeuralNet` instance. The phase is in {kTrain,
+kValidation, kTest}. `num` is used for net partitioning which indicates the
+number of partitions.  Typically, a training job includes three neural nets for
+training, validation and test phase respectively. The three neural nets share most
+layers except the data layer, loss layer or output layer, etc.. The `Create`
+function takes in the full net configuration including layers for training,
+validation and test.  It removes layers for phases other than the specified
+phase based on the `exclude` field in
+[layer configuration](layer.html):
+
+    layer {
+      ...
+      exclude : kTest # filter this layer for creating test net
+    }
+
+The filtered net configuration is passed to the constructor of `NeuralNet`:
+
+    NeuralNet::NeuralNet(NetProto netproto, int npartitions);
+
+The constructor creates a graph representing the net structure firstly in
+
+    Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions);
+
+Next, it creates a layer for each node and connects layers if their nodes are
+connected.
+
+    void NeuralNet::CreateNetFromGraph(Graph* graph, int npartitions);
+
+Since the `NeuralNet` instance may be shared among multiple workers, the
+`Create` function returns a pointer to the `NeuralNet` instance .
+
+### Parameter sharing
+
+ `Param` sharing
+is enabled by first sharing the Param configuration (in `NeuralNet::Create`)
+to create two similar (e.g., the same shape) Param objects, and then calling
+(in `NeuralNet::CreateNetFromGraph`),
+
+    void Param::ShareFrom(const Param& from);
+
+It is also possible to share `Param`s of two nets, e.g., sharing parameters of
+the training net and the test net,
+
+    void NeuralNet:ShareParamsFrom(NeuralNet* other);
+
+It will call `Param::ShareFrom` for each Param object.
+
+### Access functions
+`NeuralNet` provides a couple of access function to get the layers and params
+of the net:
+
+    const std::vector<Layer*>& layers() const;
+    const std::vector<Param*>& params() const ;
+    Layer* name2layer(string name) const;
+    Param* paramid2param(int id) const;
+
+
+### Partitioning
+
+
+#### Implementation
+
+SINGA partitions the neural net in `CreateGraph` function, which creates one
+node for each (partitioned) layer. For example, if one layer's partition
+dimension is 0 or 1, then it creates `npartition` nodes for it; if the
+partition dimension is -1, a single node is created, i.e., no partitioning.
+Each node is assigned a partition (or location) ID. If the original layer is
+configured with a location ID, then the ID is assigned to each newly created node.
+These nodes are connected according to the connections of the original layers.
+Some connection layers will be added automatically.
+For instance, if two connected sub-layers are located at two
+different workers, then a pair of bridge layers is inserted to transfer the
+feature (and gradient) blob between them. When two layers are partitioned on
+different dimensions, a concatenation layer which concatenates feature rows (or
+columns) and a slice layer which slices feature rows (or columns) would be
+inserted. These connection layers help making the network communication and
+synchronization transparent to the users.
+
+#### Dispatching partitions to workers
+
+Each (partitioned) layer is assigned a location ID, based on which it is dispatched to one
+worker. Particularly, the pointer to the `NeuralNet` instance is passed
+to every worker within the same group, but each worker only computes over the
+layers that have the same partition (or location) ID as the worker's ID.  When
+every worker computes the gradients of the entire model parameters
+(strategy-2), we refer to this process as data parallelism.  When different
+workers compute the gradients of different parameters (strategy-3 or
+strategy-1), we call this process model parallelism.  The hybrid partitioning
+leads to hybrid parallelism where some workers compute the gradients of the
+same subset of model parameters while other workers compute on different model
+parameters.  For example, to implement the hybrid parallelism in for the
+[DCNN model](http://arxiv.org/abs/1404.5997), we set `partition_dim = 0` for
+lower layers and `partition_dim = 1` for higher layers.
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/overview.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/overview.rst b/doc/en/docs/overview.rst
new file mode 100644
index 0000000..18ad62b
--- /dev/null
+++ b/doc/en/docs/overview.rst
@@ -0,0 +1,99 @@
+Introduction
+==============
+
+
+SINGA is a general distributed deep learning platform for training big deep
+learning models over large datasets. It is designed with an intuitive
+programming model based on the layer abstraction. A variety
+of popular deep learning models are supported, namely feed-forward models including
+convolutional neural networks (CNN), energy models like restricted Boltzmann
+machine (RBM), and recurrent neural networks (RNN). Many built-in layers are
+provided for users. SINGA architecture is
+sufficiently flexible to run synchronous, asynchronous and hybrid training
+frameworks.  SINGA
+also supports different neural net partitioning schemes to parallelize the
+training of large models, namely partitioning on batch dimension, feature
+dimension or hybrid partitioning.
+
+
+Goals
+-----
+
+As a distributed system, the first goal of SINGA is to have good scalability. In other
+words, SINGA is expected to reduce the total training time to achieve certain
+accuracy with more computing resources (i.e., machines).
+
+
+The second goal is to make SINGA easy to use.
+It is non-trivial for programmers to develop and train models with deep and
+complex model structures.  Distributed training further increases the burden of
+programmers, e.g., data and model partitioning, and network communication.  Hence it is essential to
+provide an easy to use programming model so that users can implement their deep
+learning models/algorithms without much awareness of the underlying distributed
+platform.
+
+Principles
+----------
+
+Scalability is a challenging research problem for distributed deep learning
+training. SINGA provides a general architecture to exploit the scalability of
+different training frameworks. Synchronous training frameworks improve the
+efficiency of one training iteration, and
+asynchronous training frameworks improve the convergence rate. Given a fixed budget
+(e.g., cluster size), users can run a hybrid framework that maximizes the
+scalability by trading off between efficiency and convergence rate.
+
+SINGA comes with a programming model designed based on the layer abstraction, which
+is intuitive for deep learning models.  A variety of
+popular deep learning models can be expressed and trained using this programming model.
+
+System overview
+---------------
+
+.. figure:: /image/sgd.png
+
+            Figure 1 - SGD flow
+
+Training a deep learning model is to find the optimal parameters involved in
+the transformation functions that generate good features for specific tasks.
+The goodness of a set of parameters is measured by a loss function, e.g.,
+`Cross-Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_ . Since the
+loss functions are usually non-linear and non-convex, it is difficult to get a
+closed form solution. Typically, people use the stochastic gradient descent
+(SGD) algorithm, which randomly
+initializes the parameters and then iteratively updates them to reduce the loss
+as shown in Figure 1.
+
+.. figure:: /image/overview.png
+
+           Figure 2 - SINGA overview
+
+SGD is used in SINGA to train
+parameters of deep learning models. The training workload is distributed over
+worker and server units as shown in Figure 2. In each
+iteration, every worker calls *TrainOneBatch* function to compute
+parameter gradients. *TrainOneBatch* takes a *NeuralNet* object
+representing the neural net, and visits layers of the *NeuralNet* in
+certain order. The resultant gradients are sent to the local stub that
+aggregates the requests and forwards them to corresponding servers for
+updating. Servers reply to workers with the updated parameters for the next
+iteration.
+
+
+Job submission
+--------------
+
+To submit a job in SINGA (i.e., training a deep learning model),
+users pass the job configuration to SINGA driver in the
+`main function <programming-guide.html>`_ . The job configuration
+specifies the four major components in Figure 2,
+
+  * a `NeuralNet <neural-net.html>`_ describing the neural net structure with the detailed layer setting and their connections;
+  * a `TrainOneBatch <train-one-batch.html>`_  algorithm which is tailored for different model categories;
+  * an `Updater <updater.html>`_  defining the protocol for updating parameters at the server side;
+  * a `Cluster Topology <distributed-training.html>`_ specifying the distributed architecture of workers and servers.
+
+This process is like the job submission in Hadoop, where users configure their
+jobs in the main function to set the mapper, reducer, etc.
+In Hadoop, users can configure their jobs with their own (or built-in) mapper and reducer; in SINGA, users
+can configure their jobs with their own (or built-in) layer, updater, etc.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/software_stack.md b/doc/en/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/en/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/tensor.rst b/doc/en/docs/tensor.rst
new file mode 100644
index 0000000..87d26ea
--- /dev/null
+++ b/doc/en/docs/tensor.rst
@@ -0,0 +1,54 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+There are two set of tensor functions,
+1. Tensor member functions, which would change the internal state of the Tensor instance.
+2. tensor module functions, which accepts Tensor instances as arguments and return
+Tensor instances.
+
+
+Create Tensor instances
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: singa.tensor.Tensor
+
+
+Tensor instances can be constructed from Numpy array,
+
+.. automodule:: singa.tensor
+   :members: from_numpy
+
+
+Set Tensor values
+~~~~~~~~~~~~~~~~~
+
+
+
+
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/downloads.md
----------------------------------------------------------------------
diff --git a/doc/en/downloads.md b/doc/en/downloads.md
new file mode 100644
index 0000000..31e7274
--- /dev/null
+++ b/doc/en/downloads.md
@@ -0,0 +1,67 @@
+## Download SINGA
+---
+
+* Latest code: please clone the dev branch from [Github](https://github.com/apache/incubator-singa)
+
+* v0.3.0 (20 April 2016):
+    * [Apache SINGA 0.3.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz)
+      [\[MD5\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz.md5)
+      [\[KEYS\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/KEYS)
+    * [Release Notes 0.3.0](releases/RELEASE_NOTES_0.3.0.html)
+    * New features and major updates,
+        * [Training on GPU cluster](v0.3.0/gpu.html) enables training of deep learning models over a GPU cluster.
+        * [Python wrapper improvement](v0.3.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
+        * [New SGD updaters](v0.3.0/updater.html) are added, including Adam, AdaDelta and AdaMax.
+        * [Installation](v0.3.0/installation.html) has fewer dependent libraries for single node training.
+        * Heterogeneous training with CPU and GPU.
+        * Support cuDNN V4.
+        * Data prefetching.
+        * Fix some bugs.
+
+
+
+* v0.2.0 (14 January 2016):
+    * [Apache SINGA 0.2.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz)
+      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz.md5)
+      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/0.2.0/KEYS)
+    * [Release Notes 0.2.0](releases/RELEASE_NOTES_0.2.0.html)
+    * New features and major updates,
+        * [Training on GPU](v0.2.0/gpu.html) enables training of complex models on a single node with multiple GPU cards.
+        * [Hybrid neural net partitioning](v0.2.0/hybrid.html) supports data and model parallelism at the same time.
+        * [Python wrapper](v0.2.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
+        * [RNN model and BPTT algorithm](v0.2.0/general-rnn.html) are implemented to support applications based on RNN models, e.g., GRU.
+        * [Cloud software integration](v0.2.0/distributed-training.html) includes Mesos, Docker and HDFS.
+        * Visualization of neural net structure and layer information, which is helpful for debugging.
+        * Linear algebra functions and random functions against Blobs and raw data pointers.
+        * New layers, including SoftmaxLayer, ArgSortLayer, DummyLayer, RNN layers and cuDNN layers.
+        * Update Layer class to carry multiple data/grad Blobs.
+        * Extract features and test performance for new data by loading previously trained model parameters.
+        * Add Store class for IO operations.
+
+
+* v0.1.0 (8 October 2015):
+    * [Apache SINGA 0.1.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/apache-singa-incubating-0.1.0.tar.gz)
+      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/apache-singa-incubating-0.1.0.tar.gz.md5)
+      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/KEYS)
+    * [Amazon EC2 image](https://console.aws.amazon.com/ec2/v2/home?region=ap-southeast-1#LaunchInstanceWizard:ami=ami-b41001e6)
+    * [Release Notes 0.1.0](releases/RELEASE_NOTES_0.1.0.html)
+    * Major features include,
+        * Installation using GNU build utility
+        * Scripts for job management with zookeeper
+        * Programming model based on NeuralNet and Layer abstractions.
+        * System architecture based on Worker, Server and Stub.
+        * Training models from three different model categories, namely, feed-forward models, energy models and RNN models.
+        * Synchronous and asynchronous distributed training frameworks using CPU
+        * Checkpoint and restore
+        * Unit test using gtest
+
+**Disclaimer**
+
+Apache SINGA is an effort undergoing incubation at The Apache Software
+Foundation (ASF), sponsored by the name of Apache Incubator PMC. Incubation is
+required of all newly accepted projects until a further review indicates that
+the infrastructure, communications, and decision making process have stabilized
+in a manner consistent with other successful ASF projects. While incubation
+status is not necessarily a reflection of the completeness or stability of the
+code, it does indicate that the project has yet to be fully endorsed by the
+ASF.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/index.rst b/doc/en/index.rst
new file mode 100755
index 0000000..50c65d7
--- /dev/null
+++ b/doc/en/index.rst
@@ -0,0 +1,109 @@
+.. Singa documentation master file, created by
+   sphinx-quickstart on Sat Jul  9 20:36:57 2016.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Apache Singa
+=======================
+
+Recent News
+-----------
+
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
+
+* The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
+
+* SINGA will be presented at `Strata+Hadoop <http://strataconf.com/big-data-conference-sg-2015/public/schedule/detail/45123>`_ on 2 Dec, 2015
+
+* SINGA was presented at `ACM Multimedia <http://www.acmmm.org/2015/at-a-glance/>`_ Best Paper session and Open Source Software Competition session, 26-30 Oct, 2015 (`Slides <files/mm2015.ppt>`_)
+
+* The **first release** is now available, 8 Oct, 2015. `Download SINGA v0.1.0 <downloads.html>`_.
+
+* SINGA was presented at `workshop on deep learning <http://www.comp.nus.edu.sg/~dbsystem/singa/workshop>`_  held on 16 Sep, 2015
+
+* SINGA was presented at `BOSS <http://boss.dima.tu-berlin.de/>`_ of `VLDB 2015 <http://www.vldb.org/2015/>`_ at Hawaii, 4 Sep, 2015. (slides: `overview <files/singa-vldb-boss.pptx>`_, `basic <files/basic-user-guide.pptx>`_, `advanced <files/advanced-user-guide.pptx>`_)
+
+* SINGA was presented at `ADSC/I2R Deep Learning Workshop <http://adsc.illinois.edu/contact-us>`_, 25 Aug, 2015.
+
+* A tutorial on SINGA was given at VLDB summer school at Tsinghua University,  25-31 July, 2015.
+
+* A half day tutorial on SINGA was given at I2R, 29 June, 2015.
+
+* SINGA was presented at `DanaC <http://danac.org/>`_ of `SIGMOD 2015 <http://www.sigmod2015.org/index.shtml>`_ at Melbourne, 31 May - 4 June, 2015.
+
+* SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
+
+Getting Started
+---------------
+* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
+
+* The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
+
+* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
+
+Documentation
+-------------
+
+* Documentations are listed `here <docs.html>`_.
+
+* Code API can be found `here <api/index.html>`_.
+
+* Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
+
+How to contribute
+----------------------
+
+* Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
+
+* If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
+
+* You can also contact with `SINGA committers <community.html>`_ directly.
+
+More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
+
+Citing SINGA
+------------
+
+Please cite the following two papers if you use SINGA in your research:
+
+* B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
+
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
+
+.. toctree::
+   :hidden:
+
+   downloads
+   docs
+
+.. toctree::
+   :hidden:
+   :maxdepth: 2
+   :caption: Development
+
+   develop/schedule
+   develop/how-contribute
+   develop/contribute-code
+   develop/contribute-docs
+
+.. toctree::
+   :hidden:
+   :maxdepth: 2
+   :caption: Community
+
+   community/source-repository
+   community/mail-lists
+   community/issue-tracking
+   community/team-list
+
+
+
+License
+----------
+SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
+
+Disclaimers
+-----------
+
+Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
deleted file mode 100755
index 50c65d7..0000000
--- a/doc/index.rst
+++ /dev/null
@@ -1,109 +0,0 @@
-.. Singa documentation master file, created by
-   sphinx-quickstart on Sat Jul  9 20:36:57 2016.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to Apache Singa
-=======================
-
-Recent News
------------
-
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
-
-* The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
-
-* SINGA will be presented at `Strata+Hadoop <http://strataconf.com/big-data-conference-sg-2015/public/schedule/detail/45123>`_ on 2 Dec, 2015
-
-* SINGA was presented at `ACM Multimedia <http://www.acmmm.org/2015/at-a-glance/>`_ Best Paper session and Open Source Software Competition session, 26-30 Oct, 2015 (`Slides <files/mm2015.ppt>`_)
-
-* The **first release** is now available, 8 Oct, 2015. `Download SINGA v0.1.0 <downloads.html>`_.
-
-* SINGA was presented at `workshop on deep learning <http://www.comp.nus.edu.sg/~dbsystem/singa/workshop>`_  held on 16 Sep, 2015
-
-* SINGA was presented at `BOSS <http://boss.dima.tu-berlin.de/>`_ of `VLDB 2015 <http://www.vldb.org/2015/>`_ at Hawaii, 4 Sep, 2015. (slides: `overview <files/singa-vldb-boss.pptx>`_, `basic <files/basic-user-guide.pptx>`_, `advanced <files/advanced-user-guide.pptx>`_)
-
-* SINGA was presented at `ADSC/I2R Deep Learning Workshop <http://adsc.illinois.edu/contact-us>`_, 25 Aug, 2015.
-
-* A tutorial on SINGA was given at VLDB summer school at Tsinghua University,  25-31 July, 2015.
-
-* A half day tutorial on SINGA was given at I2R, 29 June, 2015.
-
-* SINGA was presented at `DanaC <http://danac.org/>`_ of `SIGMOD 2015 <http://www.sigmod2015.org/index.shtml>`_ at Melbourne, 31 May - 4 June, 2015.
-
-* SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
-
-Getting Started
----------------
-* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
-
-* The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
-
-* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
-
-Documentation
--------------
-
-* Documentations are listed `here <docs.html>`_.
-
-* Code API can be found `here <api/index.html>`_.
-
-* Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
-
-How to contribute
-----------------------
-
-* Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
-
-* If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
-
-* You can also contact with `SINGA committers <community.html>`_ directly.
-
-More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
-
-Citing SINGA
-------------
-
-Please cite the following two papers if you use SINGA in your research:
-
-* B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
-
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
-
-.. toctree::
-   :hidden:
-
-   downloads
-   docs
-
-.. toctree::
-   :hidden:
-   :maxdepth: 2
-   :caption: Development
-
-   develop/schedule
-   develop/how-contribute
-   develop/contribute-code
-   develop/contribute-docs
-
-.. toctree::
-   :hidden:
-   :maxdepth: 2
-   :caption: Community
-
-   community/source-repository
-   community/mail-lists
-   community/issue-tracking
-   community/team-list
-
-
-
-License
-----------
-SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
-
-Disclaimers
------------
-
-Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/make.bat
----------------------------------------------------------------------
diff --git a/doc/make.bat b/doc/make.bat
deleted file mode 100644
index 624a328..0000000
--- a/doc/make.bat
+++ /dev/null
@@ -1,281 +0,0 @@
-@ECHO OFF
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
-	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
-	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
-	:help
-	echo.Please use `make ^<target^>` where ^<target^> is one of
-	echo.  html       to make standalone HTML files
-	echo.  dirhtml    to make HTML files named index.html in directories
-	echo.  singlehtml to make a single large HTML file
-	echo.  pickle     to make pickle files
-	echo.  json       to make JSON files
-	echo.  htmlhelp   to make HTML files and a HTML help project
-	echo.  qthelp     to make HTML files and a qthelp project
-	echo.  devhelp    to make HTML files and a Devhelp project
-	echo.  epub       to make an epub
-	echo.  epub3      to make an epub3
-	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
-	echo.  text       to make text files
-	echo.  man        to make manual pages
-	echo.  texinfo    to make Texinfo files
-	echo.  gettext    to make PO message catalogs
-	echo.  changes    to make an overview over all changed/added/deprecated items
-	echo.  xml        to make Docutils-native XML files
-	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
-	echo.  linkcheck  to check all external links for integrity
-	echo.  doctest    to run all doctests embedded in the documentation if enabled
-	echo.  coverage   to run coverage check of the documentation if enabled
-	echo.  dummy      to check syntax errors of document sources
-	goto end
-)
-
-if "%1" == "clean" (
-	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
-	del /q /s %BUILDDIR%\*
-	goto end
-)
-
-
-REM Check if sphinx-build is available and fallback to Python version if any
-%SPHINXBUILD% 1>NUL 2>NUL
-if errorlevel 9009 goto sphinx_python
-goto sphinx_ok
-
-:sphinx_python
-
-set SPHINXBUILD=python -m sphinx.__init__
-%SPHINXBUILD% 2> nul
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-:sphinx_ok
-
-
-if "%1" == "html" (
-	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
-	goto end
-)
-
-if "%1" == "dirhtml" (
-	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
-	goto end
-)
-
-if "%1" == "singlehtml" (
-	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
-	goto end
-)
-
-if "%1" == "pickle" (
-	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the pickle files.
-	goto end
-)
-
-if "%1" == "json" (
-	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the JSON files.
-	goto end
-)
-
-if "%1" == "htmlhelp" (
-	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
-	goto end
-)
-
-if "%1" == "qthelp" (
-	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
-	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Singa.qhcp
-	echo.To view the help file:
-	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Singa.ghc
-	goto end
-)
-
-if "%1" == "devhelp" (
-	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished.
-	goto end
-)
-
-if "%1" == "epub" (
-	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub file is in %BUILDDIR%/epub.
-	goto end
-)
-
-if "%1" == "epub3" (
-	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
-	goto end
-)
-
-if "%1" == "latex" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdf" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdfja" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf-ja
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "text" (
-	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The text files are in %BUILDDIR%/text.
-	goto end
-)
-
-if "%1" == "man" (
-	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The manual pages are in %BUILDDIR%/man.
-	goto end
-)
-
-if "%1" == "texinfo" (
-	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
-	goto end
-)
-
-if "%1" == "gettext" (
-	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
-	goto end
-)
-
-if "%1" == "changes" (
-	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.The overview file is in %BUILDDIR%/changes.
-	goto end
-)
-
-if "%1" == "linkcheck" (
-	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
-	goto end
-)
-
-if "%1" == "doctest" (
-	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
-	goto end
-)
-
-if "%1" == "coverage" (
-	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of coverage in the sources finished, look at the ^
-results in %BUILDDIR%/coverage/python.txt.
-	goto end
-)
-
-if "%1" == "xml" (
-	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The XML files are in %BUILDDIR%/xml.
-	goto end
-)
-
-if "%1" == "pseudoxml" (
-	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
-	goto end
-)
-
-if "%1" == "dummy" (
-	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. Dummy builder generates no files.
-	goto end
-)
-
-:end

[20/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

Updated the comments of python files for autodoc to generate python APIs by Sphinx.

Fixed a bug in optimizer which ignored the momentum value


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/33992c90
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/33992c90
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/33992c90

Branch: refs/heads/master
Commit: 33992c90191021451c9286ad28ad6140b80a9bd9
Parents: bc822cd
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Fri Aug 12 14:45:41 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 23:46:17 2016 +0800

----------------------------------------------------------------------
 cmake/Dependencies.cmake        |   5 +-
 doc/conf.py                     |  14 +-
 doc/docs/device.rst             |  29 +-
 doc/docs/index.rst              |   6 +
 doc/docs/initializer.rst        |  12 +
 doc/docs/layer.rst              |  14 +
 doc/docs/loss.rst               |   7 +
 doc/docs/metric.rst             |   8 +
 doc/docs/optimizer.rst          |  11 +
 doc/docs/tensor.md              |   7 -
 doc/docs/tensor.rst             |  30 ++
 doc/docs/utils.rst              |   6 +
 doc/index.rst                   |  28 +-
 examples/index.rst              |   6 +
 src/python/singa/device.py      |  31 ++
 src/python/singa/initializer.py |  86 ++++-
 src/python/singa/layer.py       | 417 ++++++++++++++----------
 src/python/singa/loss.py        | 105 +++++-
 src/python/singa/metric.py      |  49 ++-
 src/python/singa/optimizer.py   | 286 ++++++++--------
 src/python/singa/tensor.py      | 608 ++++++++++++++++++++++++++++++-----
 21 files changed, 1332 insertions(+), 433 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index eb729db..c03c81e 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -61,12 +61,13 @@ IF(USE_OPENCV)
     MESSAGE(STATUS "Found OpenCV_${OpenCV_VERSION} at ${OpenCV_INCLUDE_DIRS}")
     INCLUDE_DIRECTORIES(SYSTEM ${OpenCV_INCLUDE_DIRS})
     LIST(APPEND SINGA_LINKER_LIBS ${OpenCV_LIBRARIES})
-ENDIF()    
+ENDIF()
 
 #LIST(APPEND SINGA_LINKER_LIBS "/home/wangwei/local/lib/libopenblas.so")
 #MESSAGE(STATUS "link lib : " ${SINGA_LINKER_LIBS})
 
 IF(USE_PYTHON)
-    FIND_PACKAGE(PythonLibs REQUIRED)
+    FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
+    FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
     FIND_PACKAGE(SWIG 3.0 REQUIRED)
 ENDIF()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 20ba51a..9f52d16 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,7 +19,8 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, '../src/python/singa/')
+sys.path.insert(1, os.path.abspath('../build/python'))
+#autodoc_mock_imports = ['singa.device', 'singa.tensor', 'singa.layer']
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,9 +36,8 @@ source_parsers = {
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-'sphinx.ext.autodoc'
-]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -50,7 +50,7 @@ source_suffix = ['.rst', '.md']
 
 # The encoding of source files.
 #
-# source_encoding = 'utf-8-sig'
+source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
@@ -150,7 +150,7 @@ html_theme = 'sphinx_rtd_theme'
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-html_logo = '/singa.png'
+html_logo = 'image/singa.png'
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -203,7 +203,7 @@ html_static_path = ['_static']
 
 # If true, links to the reST sources are added to the pages.
 #
-html_show_sourcelink = False
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
index aa5defb..53faf48 100644
--- a/doc/docs/device.rst
+++ b/doc/docs/device.rst
@@ -2,7 +2,10 @@ Device
 =======
 
 
-The Device abstract represent a hardware device with memory and compuation units.
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
 
 Specific devices
 ----------------
@@ -13,24 +16,14 @@ Currently, SINGA has three Device implmentations,
 3. OpenclGPU for a GPU card which runs OpenCL code
 
 
-Create devices
----------------
-
 Python API
-~~~~~~~~~~
-
-.. autofunction:: device.create_cuda_gpus
-
-.. autofunction:: device.create_cuda_gpus_on
-
-.. autofunction:: device.create_cuda_gpu_on
-
-.. autofunction:: device.get_default_device
+----------
 
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
 
-The following code shows how to create devices,
 
-.. code:: python
+The following code provides examples of creating devices::
 
    from singa import device
    cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
@@ -39,9 +32,5 @@ The following code shows how to create devices,
    ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
 
 
-
 CPP API
-~~~~~~~
-
-
-
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 8a74976..2294054 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -6,4 +6,10 @@ English
    installation
    software_stack
    device
+   tensor
+   layer
+   initializer
+   loss
+   metric
+   optimizer
    examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
new file mode 100644
index 0000000..a190702
--- /dev/null
+++ b/doc/docs/initializer.rst
@@ -0,0 +1,12 @@
+Initializer
+===========
+
+Python API
+----------
+
+.. automodule:: singa.initializer
+   :members:
+   :member-order: bysource
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/layer.rst b/doc/docs/layer.rst
new file mode 100644
index 0000000..62ef3c3
--- /dev/null
+++ b/doc/docs/layer.rst
@@ -0,0 +1,14 @@
+Layer
+======
+
+Python API
+-----------
+.. automodule:: singa.layer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/docs/loss.rst b/doc/docs/loss.rst
new file mode 100644
index 0000000..27872dd
--- /dev/null
+++ b/doc/docs/loss.rst
@@ -0,0 +1,7 @@
+Loss
+=========
+
+
+.. automodule:: singa.loss
+   :members:
+   :show-inheritance:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/docs/metric.rst b/doc/docs/metric.rst
new file mode 100644
index 0000000..35fa24e
--- /dev/null
+++ b/doc/docs/metric.rst
@@ -0,0 +1,8 @@
+Metric
+=========
+
+
+.. automodule:: singa.metric
+   :members:
+   :show-inheritance:
+   :member-order: bysource

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/optimizer.rst b/doc/docs/optimizer.rst
new file mode 100644
index 0000000..486c01e
--- /dev/null
+++ b/doc/docs/optimizer.rst
@@ -0,0 +1,11 @@
+Optimizer
+=========
+
+
+.. automodule:: singa.optimizer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
deleted file mode 100644
index eaf8362..0000000
--- a/doc/docs/tensor.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Tensor
-
-
-##
-
-
-##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
new file mode 100644
index 0000000..ff6142e
--- /dev/null
+++ b/doc/docs/tensor.rst
@@ -0,0 +1,30 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+
+.. automodule:: singa.tensor
+   :members:
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/docs/utils.rst b/doc/docs/utils.rst
new file mode 100644
index 0000000..5306719
--- /dev/null
+++ b/doc/docs/utils.rst
@@ -0,0 +1,6 @@
+Misc.
+=========
+
+
+.. automodule:: singa.utils
+   :members:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
index ec727b1..50c65d7 100755
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,9 +7,9 @@ Welcome to Apache Singa
 =======================
 
 Recent News
-===========
+-----------
 
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_ 
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
 
 * The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
 
@@ -34,7 +34,7 @@ Recent News
 * SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
 
 Getting Started
-===============
+---------------
 * The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
@@ -42,7 +42,7 @@ Getting Started
 * Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
 
 Documentation
-=============
+-------------
 
 * Documentations are listed `here <docs.html>`_.
 
@@ -51,8 +51,8 @@ Documentation
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
-=================
-  
+----------------------
+
 * Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
 
 * If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
@@ -62,17 +62,17 @@ How to contribute
 More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
 
 Citing SINGA
-============
+------------
 
 Please cite the following two papers if you use SINGA in your research:
 
 * B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
 
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_). 
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
 
 .. toctree::
    :hidden:
-   
+
    downloads
    docs
 
@@ -85,25 +85,25 @@ Please cite the following two papers if you use SINGA in your research:
    develop/how-contribute
    develop/contribute-code
    develop/contribute-docs
-   
+
 .. toctree::
    :hidden:
    :maxdepth: 2
    :caption: Community
-   
+
    community/source-repository
    community/mail-lists
    community/issue-tracking
    community/team-list
-   
+
 
 
 License
-=======
+----------
 SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
 
 Disclaimers
-===========
+-----------
 
 Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
new file mode 100644
index 0000000..d6faf5d
--- /dev/null
+++ b/examples/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+
+   char-rnn/README
+   imagenet/README
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index aff3587..eff6783 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -68,21 +68,52 @@ def device_query(id, verbose=False):
 
 
 def create_cuda_gpus(num):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        num (int): number of device to create.
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
+
     return singa.Platform.CreateCudaGPUs(num)
 
 
 def create_cuda_gpu():
+    '''Create a single CudaGPU device.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
+
     return singa.Platform.CreateCudaGPUs(1)[0]
 
 
 def create_cuda_gpus_on(device_ids):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        device_ids (list): a list of GPU card IDs.
+
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
     return singa.Platform.CreateCudaGPUsOn(device_ids)
 
 
 def create_cuda_gpu_on(device_id):
+    '''Create a CudaGPU device on the given device ID.
+
+    Args:
+        device_id (int): GPU card ID.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
 
 def get_default_device():
+    '''Get the default host device which is a CppCPU device'''
     return singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 15caed3..277fd2f 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -15,29 +15,113 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-"""Popular initialization methods for parameter values (Tensor ojects)"""
+'''Popular initialization methods for parameter values (Tensor objects).
+
+Example usages::
+
+    from singa import tensor
+    from singa import initializer
+
+    x = tensor.Tensor((3, 5))
+    initializer.xavier(x)
+'''
 
 import math
 
 
+'''
+TODO(wangwei) update the uniform and gaussian initializers
+
+def uniform(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    training deep feedforward neuralnetworks.
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+    t.uniform(-x, x)
+
+
+def gaussian(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+
+    Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
+    Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+    t.gaussian(0, std)
+'''
+
+
 def uniform(t, low=0, high=1):
+    '''Initialize the parameter values following an Uniform distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        low (float): lower bound
+        high (float): higher bound
+    '''
     t.uniform(low, high)
 
 
 def gaussian(t, mean=0, std=0.01):
+    '''Initialize the parameter values following an Gaussian distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        mean (float): mean of the distribution
+        std (float): standard variance
+    '''
     t.gaussian(mean, std)
 
 
 def xavier(t):
+    '''Initialize the matrix parameter follow a Uniform distribution from
+    [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
+
     scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1]))
     t.uniform(-scale, scale)
 
 
 def glorot(t):
+    '''Initialize the matrix parameter follow a Gaussian distribution with
+    mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1]))
     t.gaussian(0, 1)
     t *= scale
 
 
 def msra(t):
+    '''Initialize the matrix parameter follow a Guassian distribution with
+    mean = 0, std = math.sqrt(2.0 / nb_row).
+
+    Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
+    nonlinearities.
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     t.gaussian(0, math.sqrt(2.0 / t.shape[0]))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..0759716 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -14,7 +14,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-""" Python layers which wraps the C++ layers by providing easy to construct APIs
+""" Python layers wrap the C++ layers to provide simpler construction APIs.
+
+Example usages::
+
+    from singa import layer
+    from singa import tensor
+    from singa import device
+    from singa.model_pb2 import kTrain
+
+    layer.engine = 'cudnn'  # to use cudnn layers
+    dev = device.create_cuda_gpu()
+
+    # create a convolution layer
+    conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32))
+    conv.to_device(dev)  # move the layer data onto a CudaGPU device
+    x = tensor.Tensor((3, 32, 32), dev)
+    x.uniform(-1, 1)
+    y = conv.foward(kTrain, x)
+
+    dy = tensor.Tensor()
+    dy.reset_like(y)
+    dy.set_value(0.1)
+    # dp is a list of tensors for parameter gradients
+    dx, dp = conv.backward(kTrain, dy)
 """
 
 from sets import Set
@@ -22,23 +45,37 @@ from . import singa_wrap
 from .proto import model_pb2
 import tensor
 
-# engine could be 'cudnn', 'singa', which is used to create layers.
-# e.g., CudnnConvolution layer is identified by 'cudnn_convolution'
-# Convolution layer is identified by 'singa_convolution'
-# engine is case insensitive
+
 engine = 'cudnn'
+'''engine is the prefix of layer identifier.
+
+The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], for
+layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively.
+For example, CudnnConvolution layer is identified by 'cudnn_convolution';
+'singacpp_convolution' is for Convolution layer;
+Some layers' implementation use only Tensor functions, thererfore they are
+transparent to the underlying devices. For threse layers, they would have
+multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
+singacl_dropout are all for the Dropout layer.
+
+engine is case insensitive. Each python layer would create the correct specific
+layer using the engine attribute.
+'''
 
 
 class Layer(object):
-    """Base Python layer class.
+    '''Base Python layer class.
 
-    Usages:
-        1.  construct layer without input_sample_shapes, goto 2;
-            construct layer with input_sample_shapes, goto 3;
+    Typically, the life cycle of a layer instance includes:
+        1. construct layer without input_sample_shapes, goto 2;
+           construct layer with input_sample_shapes, goto 3;
         2. call setup to create the parameters and setup other meta fields
         3. call forward or access layer members
         4. call backward and get parameters for update
-    """
+
+    Args:
+        name (str): layer name
+    '''
 
     def __init__(self, name, **kwargs):
         self.layer = None  # layer converted by swig
@@ -49,20 +86,24 @@ class Layer(object):
         self.has_setup = False
 
     def param_names(self):
+        '''
+        Returns:
+            a list of strings, one for the name of one parameter Tensor
+        '''
         names = []
         for x in self.param_specs:
             names.append(x['name'])
         return names
 
     def setup(self, in_shapes):
-        """Call the C++ setup function to create params and set some meta data.
+        '''Call the C++ setup function to create params and set some meta data.
 
         Args:
             in_shapes: if the layer accepts a single input Tensor, in_shapes is
                 a single tuple specifying the inpute Tensor shape; if the layer
                 accepts multiple input Tensor (e.g., the concatenation layer),
-                in_shapes is a tuple of tuples, each for one input Tensor shape
-        """
+                in_shapes is a tuple of tuples, each for one input Tensor
+        '''
         if self.has_setup:
             return
         self.layer.Setup(list(in_shapes),
@@ -70,54 +111,92 @@ class Layer(object):
         self.has_setup = True
 
     def get_output_sample_shape(self):
+        '''Called after setup to get the shape of the output sample(s).
+
+        Returns:
+            a tuple for a single output Tensor or a list of tuples if this layer
+            has multiple outputs
+        '''
         assert self.has_setup, \
             'Must call setup() before get_output_sample_shape()'
         return self.layer.GetOutputSampleShape()
 
     def param_values(self):
-        """Return param value tensors.
+        '''Return param value tensors.
 
-        Do not store these tensors as layer members because cpp Tensor could be
-        moved onto diff devices due to the change of layer device. However, the
-        py tensors would not update its internal cpp tensor automatically.
-        """
+        Parameter tensors are not stored as layer members because cpp Tensor
+        could be moved onto diff devices due to the change of layer device,
+        which would result in inconsistency.
+
+        Returns:
+            a list of tensors, one for each paramter
+        '''
         return tensor.from_raw_tensors(self.layer.param_values())
 
-    def forward(self, flag, input):
+    def forward(self, flag, x):
         '''Forward propagate through this layer.
 
         Args:
-            flag, kTrain or kEval
-            input, an input tensor
+            flag (int): kTrain or kEval
+            x (Tensor or list<Tensor>): an input tensor if the layer is
+                connected from a single layer; a list of tensors if the layer
+                is connected from multiple layers.
 
         Return:
-            a tensor for the transformed feature
+            a tensor if the layer is connected to a single layer; a list of
+            tensors if the layer is connected to multiple layers;
         '''
         assert self.has_setup, 'Must call setup() before forward()'
-        assert isinstance(input, tensor.Tensor), 'input must be py Tensor'
-        y = self.layer.Forward(flag, input.singa_tensor)
-        return tensor.from_raw_tensor(y)
+        if type(x) == list:
+            xs = []
+            for t in x:
+                x.append(t.singa_tensor)
+        else:
+            assert isinstance(input, tensor.Tensor), \
+                'input must be a Tensor or a list of Tensor'
+            xs = x
+        y = self.layer.Forward(flag, xs)
+        if type(y) == list:
+            return tensor.from_raw_tensors(y)
+        else:
+            return tensor.from_raw_tensor(y)
 
-    def backward(self, flag, grad):
-        '''Backward propagate through this layer.
+    def backward(self, flag, dy):
+        '''Backward propagate gradients through this layer.
 
         Args:
-            flag, for future use.
-            grad, gradient of the returned values of the forward function.
-
+            flag (int): for future use.
+            dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the
+                objective loss
         Return:
-            <dx, <dp1, dp2..>>, dx is the gradient of the input of the
-            forward function, dpi is the gradient of the i-th parameter
+            <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x
+            , dpi is the gradient of the i-th parameter
         '''
-        assert isinstance(grad, tensor.Tensor), 'grad must be py Tensor'
-        ret = self.layer.Backward(flag, grad.singa_tensor)
-        return tensor.from_raw_tensor(ret[0]), tensor.from_raw_tensors(ret[1])
+        if type(dy) == list:
+            dys = []
+            for t in dy:
+                dys.append(t.singa_tensor)
+        else:
+            assert isinstance(dy, tensor.Tensor), \
+                'the input must be a Tensor or a set of Tensor'
+            dys = dy.singa_tensor
+        ret = self.layer.Backward(flag, dys)
+        if type(ret[0]) == list:
+            dxs = tensor.from_raw_tensors(ret[0])
+        else:
+            dxs = tensor.from_raw_tensor(ret[0])
+        return dxs, tensor.from_raw_tensors(ret[1])
 
     def to_device(self, device):
+        '''Move layer state tensors onto the given device.
+
+        Args:
+            device: swig converted device, created using singa.device
+        '''
         self.layer.ToDevice(device)
 
     def as_type(self, dtype):
-        self.layer.AsType(dtype)
+        pass
 
     def __copy__(self):
         pass
@@ -127,43 +206,42 @@ class Layer(object):
 
 
 class Conv2D(Layer):
+    """Construct a layer for 2D convolution.
 
+    Args:
+        nb_kernels (int): num of the channels (kernels) of the input Tensor
+        kernel: an integer or a pair of integers for kernel height and width
+        stride: an integer or a pair of integers for stride height and width
+        border_mode (string): padding mode, case in-sensitive,
+            'valid' -> padding is 0 for height and width
+            'same' -> padding is half of the kernel (floor), the kernel must be
+            odd number.
+        cudnn_prefer (string): the preferred algorithm for cudnn convolution
+            which could be 'fatest', 'autotune', 'limited_workspace' and
+            'no_workspace'
+        data_format (string): either 'NCHW' or 'NHWC'
+        use_bias (bool): True or False
+        pad: an integer or a pair of integers for padding height and width
+        W_specs (dict): used to specify the weight matrix specs, fields
+            include,
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            TODO(wangwei) 'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): hyper-parameters for bias vector, similar as W_specs
+        name (string): layer name.
+        input_sample_shape: 3d tuple for the shape of the input Tensor
+            without the batchsize, e.g., (channel, height, width) or
+            (height, width, channel)
+    """
     def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
                  cudnn_prefer='fatest', data_format='NCHW',
                  use_bias=True, W_specs=None, b_specs=None,
                  pad=None, input_sample_shape=None):
-        """Construct a layer for 2D convolution.
-
-        Args:
-            nb_kernels (int): num of the channels (kernels) of the input Tensor
-            kernel: an integer or a pair of integers for kernel height and width
-            stride: an integer or a pair of integers for stride height and width
-            border_mode (string): padding mode, case in-sensitive,
-                'valid' -> padding is 0 for height and width
-                'same' -> padding is half of the kernel (floor),
-                    the kernel must be odd number.
-            cudnn_prefer (string): the preferred algorithm for cudnn convolution
-                which could be 'fatest', 'autotune', 'limited_workspace' and
-                'no_workspace'
-            data_format (string): either 'NCHW' or 'NHWC'
-            use_bias (bool): True or False
-            pad: an integer or a pair of integers for padding height and width
-            W_specs (dict): used to specify the weight matrix specs, fields
-                include,
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                TODO(wangwei) 'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): hyper-parameters for bias vector, similar as W_specs
-            name (string): layer name.
-            input_sample_shape: 3d tuple for the shape of the input Tensor
-                without the batchsize, e.g., (channel, height, width) or
-                (height, width, channel)
-        """
         super(Conv2D, self).__init__(name)
         assert data_format == 'NCHW', 'Not supported data format: %s ' \
             'only "NCHW" is enabled currently' % (data_format)
@@ -195,19 +273,19 @@ class Conv2D(Layer):
 
 
 class Conv1D(Conv2D):
+    """Construct a layer for 1D convolution.
+
+    Most of the args are the same as those for Conv2D except the kernel,
+    stride, pad, which is a scalar instead of a tuple.
+    input_sample_shape is a tuple with a single value for the input feature
+    length
+    """
 
     def __init__(self, name, nb_kernels, kernel=3, stride=1,
                  border_mode='same', cudnn_prefer='fatest',
                  use_bias=True, W_specs={'init': 'Xavier'},
                  b_specs={'init': 'Constant', 'value': 0}, pad=None,
                  input_sample_shape=None):
-        """Construct a layer for 1D convolution.
-
-        Most of the args are the same as those for Conv2D except the kernel,
-        stride, pad, which is a scalar instead of a tuple.
-        input_sample_shape is a tuple with a single value for the input feature
-        length
-        """
         pad = None
         if pad is not None:
             pad = (0, pad)
@@ -227,7 +305,15 @@ class Conv1D(Conv2D):
 
 
 class Pooling2D(Layer):
+    '''2D pooling layer providing max/avg pooling.
+
+    All args are the same as those for Conv2D, except the following one
 
+    Args:
+        mode: pooling type, model_pb2.PoolingConf.MAX or
+            model_pb2.PoolingConf.AVE
+
+    '''
     def __init__(self, name, mode, kernel=3, stride=2, border_mode='same',
                  pad=None, data_format='NCHW', input_sample_shape=None):
         super(Pooling2D, self).__init__(name)
@@ -312,28 +398,26 @@ class AvgPooling1D(AvgPooling2D):
 
 
 class BatchNormalization(Layer):
-    # TODO(wangwei) add mode and epsilon arguments
+    """Batch-normalization.
 
+    Args:
+        momentum (float): for running average mean and variance.
+        beta_specs (dict): dictionary includes the fields for the beta
+            param:
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        gamma_specs (dict): similar to beta_specs, but for the gamma param.
+        name (string): layer name
+        input_sample_shape (tuple): with at least one integer
+    """
     def __init__(self, name, momentum=0.9,
                  beta_specs=None, gamma_specs=None, input_sample_shape=None):
-        """Batch-normalization.
-
-        Args:
-            momentum (float): for running average mean and variance.
-            beta_specs (dict): dictionary includes the fields for the beta
-                param:
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            gamma_specs (dict): similar to beta_specs, but for the gamma param.
-            name (string): layer name
-            input_sample_shape (tuple): with at least one integer
-        """
         super(BatchNormalization, self).__init__(name)
         conf = self.conf.batchnorm_conf
         conf.factor = momentum
@@ -362,16 +446,17 @@ class BatchNormalization(Layer):
 
 
 class LRN(Layer):
+    """Local response normalization.
+
+    Args:
+        size (int): # of channels to be crossed
+            normalization.
+        mode (string): 'cross_channel'
+        input_sample_shape (tuple): 3d tuple, (channel, height, width)
+    """
+
     def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
                  k=1, input_sample_shape=None):
-        """Local response normalization.
-
-        Args:
-            size (int): # of channels to be crossed
-                normalization.
-            mode (string): 'cross_channel'
-            input_sample_shape (tuple): 3d tuple, (channel, height, width)
-        """
         super(LRN, self).__init__(name)
         conf = self.conf.lrn_conf
         conf.local_size = size
@@ -388,29 +473,28 @@ class LRN(Layer):
 
 
 class Dense(Layer):
+    """Apply linear/affine transformation, also called inner-product or
+    fully connected layer.
 
+    Args:
+        num_output (int): output feature length.
+        use_bias (bool): add a bias vector or not to the transformed feature
+        W_specs (dict): specs for the weight matrix
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): specs for the bias vector, same fields as W_specs.
+        W_transpose (bool): if true, output=x*W.T+b;
+        input_sample_shape (tuple): input feature length
+    """
     def __init__(self, name, num_output, use_bias=True,
                  W_specs=None, b_specs=None,
                  W_transpose=True, input_sample_shape=None):
-        """Apply linear/affine transformation, also called inner-product or
-        fully connected layer.
-
-        Args:
-            num_output (int): output feature length.
-            use_bias (bool): add a bias vector or not to the transformed feature
-            W_specs (dict): specs for the weight matrix
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): specs for the bias vector, same fields as W_specs.
-            W_transpose (bool): if true, output=x*W.T+b;
-            input_sample_shape (tuple): input feature length
-        """
         super(Dense, self).__init__(name)
         conf = self.conf.dense_conf
         conf.num_output = num_output
@@ -435,14 +519,14 @@ class Dense(Layer):
 
 
 class Dropout(Layer):
+    """Droput layer.
 
-    def __init__(self, name, p=0.5, input_sample_shape=None):
-        """Droput layer.
+    Args:
+        p (float): probability for dropping out the element, i.e., set to 0
+        name (string): layer name
+    """
 
-        Args:
-            p (float): probability for dropping out the element, i.e., set to 0
-            name (string): layer name
-        """
+    def __init__(self, name, p=0.5, input_sample_shape=None):
         super(Dropout, self).__init__(name)
         conf = self.conf.dropout_conf
         conf.dropout_ratio = p
@@ -456,15 +540,14 @@ class Dropout(Layer):
 
 
 class Activation(Layer):
+    """Activation layers.
 
+    Args:
+        name (string): layer name
+        mode (string): 'relu', 'sigmoid', or 'tanh'
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, mode='relu', input_sample_shape=None):
-        """Activation layers.
-
-        Args:
-            name (string): layer name
-            mode (string): 'relu', 'sigmoid', or 'tanh'
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Activation, self).__init__(name)
         self.conf.type = (engine + '_' + mode).lower()
         _check_engine(engine, ['cudnn', 'singa'])
@@ -474,15 +557,14 @@ class Activation(Layer):
 
 
 class Softmax(Layer):
+    """Apply softmax.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Apply softmax.
-
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
@@ -493,14 +575,14 @@ class Softmax(Layer):
 
 
 class Flatten(Layer):
+    """Reshape the input tensor into a matrix.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape for a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Reshape the input tensor into a matrix.
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape for a single sample
-        """
         super(Flatten, self).__init__(name)
         conf = self.conf.flatten_conf
         conf.axis = axis
@@ -511,26 +593,27 @@ class Flatten(Layer):
 
 
 class RNN(Layer):
+    '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu.
+
+    Args:
+        hidden_size: hidden feature size, the same for all stacks of layers.
+        rnn_mode: decides the rnn unit, which could be one of 'lstm', 'gru',
+            'tanh' and 'relu', refer to cudnn manual for each mode.
+        num_stacks: num of stacks of rnn layers. It is different to the
+            unrolling seqence length.
+        input_mode: 'linear' convert the input feature x by by a linear
+            transformation to get a feature vector of size hidden_size;
+            'skip' does nothing but requires the input feature size equals
+            hidden_size
+        bidirection: True for bidirectional RNN
+        param_specs: config for initializing the RNN parameters.
+        input_sample_shape: includes a single integer for the input sample
+            feature size.
+    '''
+
     def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0,
                  num_stacks=1, input_mode='linear', bidirectional=False,
                  param_specs=None, input_sample_shape=None):
-        '''Wrapper for singa::RNN class.
-
-        Args:
-            hidden_size, hidden feature size, the same for all stacks of layers.
-            rnn_mode, decides the rnn unit, which could be one of 'lstm', 'gru',
-                'tanh' and 'relu', refer to cudnn manual for each mode.
-            num_stacks, num of stacks of rnn layers. It is different to the
-                unrolling seqence length.
-            input_mode, 'linear' convert the input feature x by by a linear
-                transformation to get a feature vector of size hidden_size;
-                'skip' does nothing but requires the input feature size equals
-                hidden_size
-            bidirection, True for bidirectional RNN
-            param_specs, config for initializing the RNN parameters.
-            input_sample_shape, includes a single integer for the input sample
-                feature size.
-        '''
         super(RNN, self).__init__(name)
         conf = self.conf.rnn_conf
         assert hidden_size > 0, 'Hidden feature size must > 0'
@@ -605,7 +688,7 @@ class RNN(Layer):
 
         Returns:
             <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for
-            the i-th input, its shape is (batch_size,
+                the i-th input, its shape is (batch_size,
                 input_feature_length). dhx is the gradient for the initial
                 hidden state. dcx is the gradient for the initial cell state,
                 which is valid only for lstm.
@@ -741,5 +824,7 @@ def _construct_param_specs_from_dict(specs):
 
 
 def get_layer_list():
-    """ Return a list of strings reprensenting the all supported layers"""
+    """ Return a list of strings which include the identifiers (tags) of all
+    supported layers
+    """
     return singa_wrap.GetRegisteredLayers()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py
index acfb813..c88290b 100644
--- a/src/python/singa/loss.py
+++ b/src/python/singa/loss.py
@@ -15,32 +15,127 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+
+'''
+Loss module includes a set of training loss implmentations. Some are converted
+from C++ implementation, and the rest are implemented directly using python
+Tensor.
+
+Example usage::
+
+    from singa import tensor
+    from singa import loss
+    from singa.proto import model_pb2
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = loss.SoftmaxCrossEntropy()
+    l = f.forward(model_pb2.kTrain, x, y)  # l is tensor with 3 loss values
+    g = f.backward()  # g is a tensor containing all gradients of x w.r.t l
+'''
+
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Loss(object):
+    '''Base loss class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    backward, and evaluate functions of this base class. Other subclasses need
+    to override these functions
+    '''
 
     def __init__(self):
         self.swig_loss = None
 
     def forward(self, flag, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the loss values.
+
+        Args:
+            flag (int): kTrain or kEval. If it is kTrain, then the backward
+                function must be called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0]
+
+        Returns:
+            a tensor of floats for the loss values, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
 
     def backward(self):
-        """Return the grad of x w.r.t. the loss obj"""
+        '''
+        Returns:
+            the grad of x w.r.t. the loss
+        '''
         return tensor.from_raw_tensor(self.swig_loss.Backward())
 
-    def evaluate(self, flag, x, y):
-        """Return the averaged loss for all samples in x"""
+    def evaluate(self, flag, x, y):  # TODO(wangwei) remove flag
+        '''
+        Args:
+            flag (int): must be kEval, to be removed
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truth Tnesor
+
+        Returns:
+            the averaged loss for all samples in x.
+        '''
         return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
 
 
 class SoftmaxCrossEntropy(Loss):
+    '''This loss function is a combination of SoftMax and Cross-Entropy loss.
+
+    It converts the inputs via SoftMax function and then
+    computes the cross-entropy loss against the ground truth values.
+    '''
 
     def __init__(self):
         self.swig_loss = singa.SoftmaxCrossEntropy()
+
+
+class SquaredError(Loss):
+    '''This loss evaluates the squared error between the prediction and the
+    truth values.
+
+    It is implemented using Python Tensor operations.
+    '''
+    def __init__(self):
+        super(Loss, SquaredError).__init__()
+        self.err = None
+
+    def forward(self, flag, x, y):
+        '''Compute the error as 0.5 * ||x-y||^2.
+
+        Args:
+            flag (int): kTrain or kEval; if kTrain, then the backward must be
+                called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the truth Tensor, an integer value per sample, whose
+                value is [0, x.shape[1])
+
+        Returns:
+            a Tensor with one error value per sample
+        '''
+        self.err = x - y
+        return 0.5 * tensor.squared(self.err)
+
+    def backward(self):
+        '''Compute the gradient of x w.r.t the error.
+
+        Returns:
+            x - y
+        '''
+        return self.err
+
+    def evaluate(self, flag, x, y):
+        '''Compuate the averaged error.
+
+        Returns:
+            a float value as the averaged error
+        '''
+        return tensor.sum(0.5 * tensor.squared(x - y)) / x.size()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/metric.py
----------------------------------------------------------------------
diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py
index 31b6892..3a5750d 100644
--- a/src/python/singa/metric.py
+++ b/src/python/singa/metric.py
@@ -15,28 +15,71 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of metric classes for evaluating the model's
+performance. The specific metric classes could be converted from C++
+implmentation or implemented directly using Python.
+
+
+Example usage::
+
+    from singa import tensor
+    from singa import metric
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    x = tensor.SoftMax(x)  # normalize the prediction into probabilities
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = metric.Accuracy()
+    acc = f.evaluate(x, y)  # averaged accuracy over all 3 samples in x
+
+'''
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Metric(object):
+    '''Base metric class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    and evaluate functions of this base class. Other subclasses need
+    to override these functions. Users need to feed in the **predictions** and
+    ground truth to get the metric values.
+    '''
 
     def __init__(self):
         self.swig_metric = None
 
     def forward(self, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the metric for each sample.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+
+        Returns:
+            a tensor of floats, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_metric.Forward(x.singa_tensor, y.singa_tensor))
 
     def evaluate(self, x, y):
-        """Return the averaged metric for all samples in x"""
+        '''Compute the averaged metric over all samples.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+        Returns:
+            a float value for the averaged metric
+        '''
         return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor)
 
 
 class Accuracy(Metric):
+    '''Compute the top one accuracy for singel label prediction tasks.
 
+    It calls the C++ functions to do the calculation.
+    '''
     def __init__(self):
         self.swig_metric = singa.Accuracy()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index aa6bdd1..a964f16 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -15,7 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of optimizers for updating model parameters.
+
+Example usage::
+
+  from singa import optimizer
+  from singa import tensor
+
+  sgd = optimizer.SGD(lr=0.01, momentum=0.9, weight_decay=1e-4)
+  p = tensor.Tensor((3,5))
+  p.uniform(-1, 1)
+  g = tensor.Tensor((3,5))
+  g.gaussian(0, 0.01)
+
+  sgd.apply(1, g, p, 'param')  # use the global lr=0.1 for epoch 1
+  sgd.apply_with_lr(2, 0.03, g, p, 'param')  # use lr=0.03 for epoch 2
+'''
 
 from . import singa_wrap as singa
 import tensor
@@ -23,53 +38,44 @@ from proto import model_pb2
 
 
 class Optimizer(object):
-    """Base python optimizer.
-
-    Usages:
-        1. construct the optimizer
-        2. (optional) register each parameter with its specs.
-        3. use the optimizer to update parameter values given parameter
-            gradients and other optional info
-    """
-
+    '''The base python optimizer class.
+
+    Typically, an optimizer is used as follows:
+
+    1. construct the optimizer
+    2. (optional) register each parameter with its specs.
+    3. use the optimizer to update parameter values given parameter
+        gradients and other optional info
+
+    The subclasses should override the apply_with_lr function to do the real
+    parameter udpate.
+
+    Args:
+        lr (float): a constant for the learning rate, mutually exclusive with
+            'lr_gen'.
+        momentum (float): a constant for the momentum value
+        decay (float): the coefficent for L2 regularizer, which is mutually
+            exclusive with 'regularizer'.
+        lr_gen (function): a function returns the learning rate given
+            the current training step/epoch. It is mutually exclusive with lr.
+            If both are not set, the apply_with_lr function should be used for
+            param updating.
+        regularizer: an instance of Regularizer or RegularizerConf; If set,
+            regularization would be applied in apply_with_lr().
+            Users can also do regularization outside.
+        constraint: an instance of Constraint or ConstraintConf; If set,
+            constraint would be applied inside apply_with_lr(). Users can
+            also do regularization outside.
+    '''
     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
-                 momentum_gen=None, regularizer=None, constraint=None):
-        """Constructor.
-
-        Args:
-            lr: a constant or a function that generates learning rate given a
-                step, which is mutually exclusive with 'lr_gen'.
-            momentum: a constant or a function that generates the momentum value
-                given a step.
-            decay (float): the coefficent for L2 regularizer, which is mutually
-                exclusive with 'regularizer'.
-            lr_gen (function): a function returns the learning rate given
-                the current training step. It is mutually exclusive with lr. If
-                both are not set, the apply_with_lr function should be used for
-                param updating.
-            momentum_gen (function): a function returns the momentum value given
-                the current training step. It is mutually exclusive with
-                momentum.
-            regularizer: an instance of Regularizer or RegularizerConf; If set,
-                regularization would be applied in apply_with_lr().
-                Users can also do regularization outside.
-            constraint: an instance of Constraint or ConstraintConf; If set,
-                constraint would be applied inside apply_with_lr(). Users can
-                also do regularization outside.
-        """
+                 regularizer=None, constraint=None):
         if lr is not None:
             assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
 
-            def lr_gen(step):
+            def lr_gen(epoch):
                 return lr
         self.lr_gen = lr_gen
-        if momentum is not None:
-            assert momentum_gen is None, 'Cannot set momentum and momentum_gen'\
-                ' at the same time'
-
-            def momentum_gen(step):
-                return momentum
-        self.momentum_gen = momentum_gen
+        self.momentum = momentum
         if decay is not None:
             assert regularizer is None, \
                 'Cannot set decay and regularizer at the same time'
@@ -94,14 +100,16 @@ class Optimizer(object):
         self.learning_rate_multiplier = {}
 
     def register(self, name, specs):
-        """Register the param specs, including creating regularizer and
+        '''Register the param specs, including creating regularizer and
         constraint per param object. Param specific regularizer and constraint
         have higher priority than the global ones.
 
         Args:
             name (str): parameter name
-            specs (ParamSpec): protobuf obj
-        """
+            specs (ParamSpec): protobuf obj, including regularizer and
+                constraint, multipliers for learning rate and weight decay.
+
+        '''
 	assert type(specs) == model_pb2.ParamSpec, \
 		'specs should be model_pb2.ParamSpec instance'
         if specs.HasField('regularizer'):
@@ -113,8 +121,8 @@ class Optimizer(object):
         if specs.decay_mult != 1:
             self.decay_multiplier[name] = specs.decay_mult
 
-    def apply_regularizer_constraint(self, value, grad, name=None, step=None):
-        """Apply regularization and constraint if available.
+    def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+        '''Apply regularization and constraint if available.
 
         If there are both global regularizer (constraint) and param specific
         regularizer (constraint), it would use the param specific one.
@@ -123,46 +131,48 @@ class Optimizer(object):
             value (Tensor): parameter value Tensor
             grad (Tensor): parameter gradient Tensor
             name (string): to get parameter specific regularizer or constraint
-            step (int): some regularizer or constraint would use step
+            epoch (int): some regularizer or constraint would use epoch
 
-        Return:
+        Returns:
             the updated gradient Tensor
-        """
+        '''
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, step)
+            self.constraints[name].apply(value, grad, epoch)
         elif self.constraint is not None:
-            self.constraint.apply(step, value, grad)
+            self.constraint.apply(epoch, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, step)
+            self.regularizers[name].apply(value, grad, epoch)
         elif self.regularizer is not None:
-            self.regularizer.apply(step, value, grad)
+            self.regularizer.apply(epoch, value, grad)
         return grad
 
-    def apply_with_lr(self, step, lr, grad, value, name=None):
-        """Do update with given learning rate.
+    def apply_with_lr(self, epoch, lr, grad, value, name=None):
+        '''Do update with given learning rate.
 
         The subclass optimizer must override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             lr (float): learning rate
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
                 updating rules (including regularizer and constraint)
 
-        Return:
+        Returns:
             updated parameter value
-        """
+        '''
         assert False, 'This is the base function, pls call the subclass func'
         return value
 
-    def apply(self, step, grad, value, name=None):
-        """Do update assume the learning rate generator is set.
+    def apply(self, epoch, grad, value, name=None):
+        '''Do update assuming the learning rate generator is set.
 
         The subclass optimizer does not need to override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
@@ -170,98 +180,109 @@ class Optimizer(object):
 
         Return:
             updated parameter value
-        """
-
+        '''
         assert self.lr_gen is not None, 'Learning rate generator is not set.'\
             'Either set the lr_gen in constructor or call apply_with_lr'
-        lr = self.lr_gen(step)
-        return self.apply_with_lr(step, lr, grad, value, name)
+        lr = self.lr_gen(epoch)
+        return self.apply_with_lr(epoch, lr, grad, value, name)
 
 
 class SGD(Optimizer):
+    '''The vallina Stochasitc Gradient Descent algorithm with momentum.
 
-    def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
-        """The vallina Stochasitc Gradient Descent algorithm.
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(SGD, self).__init__(lr, momentum, decay)
+    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                  constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = momentum
+        conf.momentum = self.momentum
+        conf.type = 'sgd'
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Nesterov(Optimizer):
+    '''The SGD with Nesterov momentum.
 
-    def __init__(self, lr=None, momentum=0.9, decay=None, **kwargs):
-        """The SGD with Nesterov momentum
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
+    def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                       constraint)
         conf = model_pb2.OptimizerConf()
+        conf.momentum = momentum
+        conf.type = 'nesterov'
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class AdaGrad(Optimizer):
+    '''AdaGrad optimizer.
 
-    def __init__(self, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """AdaGrad optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            epsilon (float): small number for preventing numeric error.
-        """
-        super(RMSProp, self).__init__(lr, decay, **kwargs)
+    Args:
+        epsilon (float): small number for preventing numeric error.
+    '''
+    def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
+        conf.type = 'adagrad'
         self.opt = singa.CreateOptimizer('AdaGrad')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class RMSProp(Optimizer):
+    '''RMSProp optimizer.
 
-    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """RMSProp optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            rho (float): float within [0, 1]
-            epsilon (float): small value for preventing numeric error
-        """
-        super(RMSProp, self).__init__(lr, decay, kwargs)
+    Args:
+        rho (float): float within [0, 1]
+        epsilon (float): small value for preventing numeric error
+    '''
+
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.rho = rho
         conf.delta = epsilon
         self.opt = singa.CreateOptimizer('RMSProp')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Regularizer(object):
-    """Base Python regularizer for parameter gradients.
-    """
+    '''Base Python regularizer for parameter gradients.'''
 
     def apply(self, value, grad):
         assert False, 'Not Implemented. Call the subclass function.'
@@ -269,34 +290,32 @@ class Regularizer(object):
 
 
 class CppRegularizer(Regularizer):
-    """Wrapper for regularizer implemented using C++.
-    """
+    '''Wrapper for regularizer implemented using C++.
 
-    def __init__(self, conf):
-        """Constructor.
+    Args:
+        conf (RegularizerConf): protobuf message for the configuration.
+    '''
 
-        Args:
-            conf (RegularizerConf): protobuf message for the configuration.
-        """
+    def __init__(self, conf):
         self.reg = singa.CreateRegularizer(conf.type)
         self.reg.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.reg.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.reg.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Regularizer(Regularizer):
-    """L2 regularization"""
+    '''L2 regularization
+
+    Args:
+        coefficient (float): regularization coefficient.
+    '''
 
     def __init__(self, coefficient):
-        """
-        Args:
-            coefficient (float): regularization coefficient.
-        """
         self.coefficient = coefficient
 
-    def apply(self, step, value, grad, coefficient=None):
+    def apply(self, epoch, value, grad, coefficient=None):
         if coefficient is None:
             assert self.coefficient is not None, 'Must set the coefficient'
             coefficient = self.coefficient
@@ -305,39 +324,34 @@ class L2Regularizer(Regularizer):
 
 
 class Constraint(object):
-    """Base Python constraint class for paramter gradients.
-    """
+    '''Base Python constraint class for paramter gradients'''
 
-    def apply(self, step, value, grad):
+    def apply(self, epoch, value, grad):
         return grad
 
 
 class CppConstraint(Constraint):
-    """Wrapper for constraints implemented using C++.
-    """
+    '''Wrapper for constraints implemented using C++.
 
+    Args:
+        conf (ConstraintConf): protobuf message for the configuration.
+    '''
     def __init__(self, conf):
-        """Constructor.
-
-        Args:
-            conf (ConstraintConf): protobuf message for the configuration.
-        """
         self.constraint = singa.CreateConstraint(conf.type)
         self.constraint.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.constraint.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.constraint.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Constraint(Constraint):
-    """Rescale the gradient to make the L2 norm <= a given threshold.
-    """
+    '''Rescale the gradient to make the L2 norm <= a given threshold'''
 
     def __init__(self, threshold=None):
         self.threshold = threshold
 
-    def apply(self, step, value, grad, threshold=None):
+    def apply(self, epoch, value, grad, threshold=None):
         if threshold is None:
             assert self.threshold is not None, 'Must set the threshold'
             threshold = self.threshold

[26/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

Update the layer identifier to be consistent with the documentation.
In particualr, the identifier prefix should be one of
['cudnn','singacpp', 'singacuda', 'singacl']. The complete identifier
would be <prefix>_xxxx, e.g., cudnn_convolution and
singacpp_convolution. The identifier should not have upper case letters.
If the implmentation is transparent to cpp/cuda/opencl, then register all
possible identifiers. For instance, Dropout is registered three times,
RegisterLayerClass("singacpp_dropout", Dropout)
RegisterLayerClass("singacl_dropout", Dropout)
RegisterLayerClass("singacuda_dropout", Dropout)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9c71bd67
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9c71bd67
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9c71bd67

Branch: refs/heads/master
Commit: 9c71bd6745450019f9d4cbb748949142cf687616
Parents: cdd718e
Author: Wei Wang <wa...@gmail.com>
Authored: Sun Aug 14 23:41:03 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 19:55:17 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/train.py       |  2 +-
 examples/cifar10/alexnet.cc      |  2 +-
 examples/cifar10/alexnet.py      |  2 +-
 examples/cifar10/vgg-parallel.cc |  1 +
 examples/cifar10/vgg.py          |  4 ++--
 examples/imagenet/alexnet.cc     |  1 +
 examples/mnist/train.py          |  8 ++++----
 include/singa/core/device.h      | 15 +++++++-------
 include/singa/model/layer.h      | 13 +++++++++++-
 src/core/device/cpp_cpu.cc       |  2 +-
 src/model/layer/activation.cc    | 10 +++++++++
 src/model/layer/batchnorm.cc     |  3 +++
 src/model/layer/convolution.cc   |  1 +
 src/model/layer/dense.cc         |  3 +++
 src/model/layer/dropout.cc       |  3 +++
 src/model/layer/flatten.cc       |  3 +++
 src/model/layer/lrn.cc           |  3 +++
 src/model/layer/pooling.cc       |  1 +
 src/model/layer/prelu.cc         |  3 +++
 src/model/layer/rnn.cc           |  3 +++
 src/model/layer/softmax.cc       |  3 +++
 src/python/singa/device.py       |  4 ++--
 src/python/singa/layer.py        | 28 ++++++++++++++++----------
 src/python/singa/optimizer.py    | 38 ++++++++++++++++++-----------------
 src/python/singa/tensor.py       | 13 ++++++------
 src/python/swig/core_device.i    |  4 ++--
 test/python/test_layer.py        | 11 +++++-----
 test/singa/test_cpp_cpu.cc       |  2 +-
 test/singa/test_layer.cc         |  2 +-
 29 files changed, 123 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 1273a57..83771c2 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -128,7 +128,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
     print 'dense b ', dense_b.shape
     initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
     print 'dense weight l1 = %f' % (dense_w.l1())
-    dense_b.set_value(0.0)
+    dense_b.set_value(0)
     print 'dense b l1 = %f' % (dense_b.l1())
 
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index e1363e4..fa953f8 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -27,7 +27,7 @@
 #include "singa/utils/channel.h"
 #include "singa/utils/string.h"
 namespace singa {
-
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 34da95d..17b6a89 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -29,7 +29,7 @@ from singa import net as ffnet
 
 def create_net(use_cpu=False):
     if use_cpu:
-        layer.engine = 'singa'
+        layer.engine = 'singacpp'
 
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index 149cb21..90e9fce 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -34,6 +34,7 @@
 
 namespace singa {
 
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 const float default_wd  = 0.0005f;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index e8e3602..89c6fe8 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -38,7 +38,7 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None):
 
 def create_net(use_cpu=False):
     if use_cpu:
-        layer.engine = 'singa'
+        layer.engine = 'singacpp'
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
     net.add(layer.Dropout('drop1', 0.3))
@@ -84,7 +84,7 @@ def create_net(use_cpu=False):
             initializer.uniform(p, 0, 1)
         elif len(p.shape) > 1:
             if 'conv' in name:
-                p.gaussian(0, 0, 3 * 3 * p.shape[0])
+                initializer.gaussian(p, 0, 3 * 3 * p.shape[0])
             else:
                 p.gaussian(0, 0.02)
         else:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/imagenet/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/imagenet/alexnet.cc b/examples/imagenet/alexnet.cc
index 26b2d96..4ac1130 100644
--- a/examples/imagenet/alexnet.cc
+++ b/examples/imagenet/alexnet.cc
@@ -33,6 +33,7 @@
 #include "singa/utils/timer.h"
 namespace singa {
 
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std, float bias = .0f) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
index 43b8e26..55c7cbb 100644
--- a/examples/mnist/train.py
+++ b/examples/mnist/train.py
@@ -85,7 +85,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
             tposhidsample = tensor.gt(tposhidprob, tposhidrandom)
 
             # negative phase
-            tnegdata = tensor.mult(tposhidsample, tweight.transpose())
+            tnegdata = tensor.mult(tposhidsample, tweight.T())
             tnegdata.add_row(tvbias)
             tnegdata = tensor.sigmoid(tnegdata)
 
@@ -95,8 +95,8 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
             error = tensor.sum(tensor.square((tdata - tnegdata)))
             trainerrorsum = error + trainerrorsum
 
-            tgweight = tensor.mult(tnegdata.transpose(), tneghidprob) -\
-                    tensor.mult(tdata.transpose(), tposhidprob)
+            tgweight = tensor.mult(tnegdata.T(), tneghidprob) -\
+                    tensor.mult(tdata.T(), tposhidprob)
             tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
             tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)
 
@@ -115,7 +115,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
         initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
         tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)
 
-        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.transpose())
+        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T())
         tvalidnegdata.add_row(tvbias)
         tvalidnegdata = tensor.sigmoid(tvalidnegdata)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index a564524..810d41f 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -152,6 +152,7 @@ class CppCPU : public Device {
 
   std::shared_ptr<Device> host() const override { return defaultDevice;}
   void SetRandSeed(unsigned seed) override;
+
  protected:
   void DoExec(function<void(Context*)>&& fn, int executor) override;
 
@@ -303,10 +304,15 @@ private:
 /// If CUDA or OPENCL are not enabled, then the respective related methods should
 /// return something that indicates their absence (for example, 0 devices);
 /// however they should always be available regardless of compile-time switches.
-#ifdef USE_CUDA
 class Platform {
 public:
 
+  /// Return the defualt host device
+  static std::shared_ptr<Device> GetDefaultDevice() {
+    return defaultDevice;
+  }
+
+#ifdef USE_CUDA
   /// Return the number of total available GPUs
   static int GetNumGPUs();
 
@@ -322,11 +328,6 @@ public:
   /// Return a string containing all hardware info, e.g., version, memory size.
   static const std::string DeviceQuery(int id, bool verbose = false);
 
-  /// Return the defualt host device
-  static std::shared_ptr<Device> GetDefaultDevice() {
-    return defaultDevice;
-  }
-
   /// Create a set of CudaGPU Device using 'num_devices' free GPUs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
@@ -334,6 +335,7 @@ public:
   /// Create a set of CudaGPU Device using given GPU IDs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+#endif // USE_CUDA
 
   /// Create a \p num_devices set of valid OpenCL devices, regardless of
   /// platforms.  If there are fewer valid devices than requested, then this
@@ -373,7 +375,6 @@ private:
 #endif  // USE_OPENCL
 };
 
-#endif // USE_CUDA
 
 }  // namespace singa
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/include/singa/model/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h
index 58f0f4b..e67fcc5 100644
--- a/include/singa/model/layer.h
+++ b/include/singa/model/layer.h
@@ -222,6 +222,17 @@ class Layer {
   vector<ParamSpec> param_specs_;
 };
 
+/// Name should be formated as cudnn_xxx, singacpp_xxx, singacuda_xxx,
+/// singacl_xxx, where xxx is the real layer type, e.g., convolution, relu, etc.
+/// xxx should only have lower case letters.
+/// if the implmentation is transparent to cpp/cuda/opencl, then register all
+/// possible identifiers. For instance, Dropout is registered three times,
+/// RegisterLayerClass("singacpp_dropout", Dropout)
+/// RegisterLayerClass("singacl_dropout", Dropout)
+/// RegisterLayerClass("singacuda_dropout", Dropout)
+/// to be compatible with previous commits, the following identifier is
+/// registered. Better avoid using it, as it would be deprecated.
+/// RegisterLayerClass("singa_dropout", Dropout)
 #define RegisterLayerClass(Name, SubLayer) \
   static Registra<Layer, SubLayer> Name##SubLayer(#Name);
 
@@ -234,7 +245,7 @@ inline const std::vector<std::string> GetRegisteredLayers() {
   vector<std::string> ret;
   for (const string type : Factory<Layer>::GetIDs()) {
     auto layer = CreateLayer(type);
-    ret.push_back("Register type: " + type + " --> " + layer->layer_type());
+    ret.push_back("Register type: " + type);
   }
   return ret;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/core/device/cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/src/core/device/cpp_cpu.cc b/src/core/device/cpp_cpu.cc
index 2b3e63b..04209ab 100644
--- a/src/core/device/cpp_cpu.cc
+++ b/src/core/device/cpp_cpu.cc
@@ -22,7 +22,7 @@ namespace singa {
 
 std::shared_ptr<Device> defaultDevice=std::make_shared<CppCPU>();
 
-CppCPU::CppCPU() : Device(0, 1) {
+CppCPU::CppCPU() : Device(-1, 1) {
   lang_ = kCpp;
   //host_ = nullptr;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/activation.cc b/src/model/layer/activation.cc
index aa40edb..eb90d87 100644
--- a/src/model/layer/activation.cc
+++ b/src/model/layer/activation.cc
@@ -25,6 +25,16 @@ RegisterLayerClass(singa_relu, Activation);
 RegisterLayerClass(singa_sigmoid, Activation);
 RegisterLayerClass(singa_tanh, Activation);
 
+RegisterLayerClass(singacpp_relu, Activation);
+RegisterLayerClass(singacuda_relu, Activation);
+RegisterLayerClass(singacl_relu, Activation);
+RegisterLayerClass(singacpp_sigmoid, Activation);
+RegisterLayerClass(singacuda_sigmoid, Activation);
+RegisterLayerClass(singacl_sigmoid, Activation);
+RegisterLayerClass(singacpp_tanh, Activation);
+RegisterLayerClass(singacuda_tanh, Activation);
+RegisterLayerClass(singacl_tanh, Activation);
+
 void Activation::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   auto pos = conf.type().find_first_of('_');

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index f348661..b345c6b 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -22,6 +22,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_batchnorm, BatchNorm);
+RegisterLayerClass(singacpp_batchnorm, BatchNorm);
+RegisterLayerClass(singacuda_batchnorm, BatchNorm);
+RegisterLayerClass(singacl_batchnorm, BatchNorm);
 void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 4fc209f..0d1751d 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -24,6 +24,7 @@ namespace singa {
 using std::vector;
 
 RegisterLayerClass(singa_convolution, Convolution);
+RegisterLayerClass(singacpp_convolution, Convolution);
 void Convolution::Setup(const Shape &in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   ConvolutionConf conv_conf = conf.convolution_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 1a2d16e..7470154 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -24,6 +24,9 @@ namespace singa {
 using std::vector;
 
 RegisterLayerClass(singa_dense, Dense);
+RegisterLayerClass(singacpp_dense, Dense);
+RegisterLayerClass(singacuda_dense, Dense);
+RegisterLayerClass(singacl_dense, Dense);
 Dense::~Dense() {
   // delete weight_;
   // delete bias_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dropout.cc b/src/model/layer/dropout.cc
index 35801b4..d7397a1 100644
--- a/src/model/layer/dropout.cc
+++ b/src/model/layer/dropout.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_dropout, Dropout);
+RegisterLayerClass(singacpp_dropout, Dropout);
+RegisterLayerClass(singacuda_dropout, Dropout);
+RegisterLayerClass(singacl_dropout, Dropout);
 void Dropout::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   dropout_ratio_ = conf.dropout_conf().dropout_ratio();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/flatten.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/flatten.cc b/src/model/layer/flatten.cc
index d89361e..561c310 100644
--- a/src/model/layer/flatten.cc
+++ b/src/model/layer/flatten.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_flatten, Flatten);
+RegisterLayerClass(singacpp_flatten, Flatten);
+RegisterLayerClass(singacuda_flatten, Flatten);
+RegisterLayerClass(singacl_flatten, Flatten);
 void Flatten::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   axis_ = conf.flatten_conf().axis();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/lrn.cc b/src/model/layer/lrn.cc
index 6b5a618..4fdb5c9 100644
--- a/src/model/layer/lrn.cc
+++ b/src/model/layer/lrn.cc
@@ -23,6 +23,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_lrn, LRN);
+RegisterLayerClass(singacpp_lrn, LRN);
+RegisterLayerClass(singacuda_lrn, LRN);
+RegisterLayerClass(singacl_lrn, LRN);
 void LRN::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index 5e7ba1d..23969da 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -21,6 +21,7 @@
 namespace singa {
 
 RegisterLayerClass(singa_pooling, Pooling);
+RegisterLayerClass(singacpp_pooling, Pooling);
 void Pooling::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   PoolingConf pool_conf = conf.pooling_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/prelu.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/prelu.cc b/src/model/layer/prelu.cc
index a20972c..e567172 100644
--- a/src/model/layer/prelu.cc
+++ b/src/model/layer/prelu.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_prelu, PReLU);
+RegisterLayerClass(singacpp_prelu, PReLU);
+RegisterLayerClass(singacuda_prelu, PReLU);
+RegisterLayerClass(singacl_prelu, PReLU);
 void PReLU::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/rnn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/rnn.cc b/src/model/layer/rnn.cc
index 524b462..b811f9d 100644
--- a/src/model/layer/rnn.cc
+++ b/src/model/layer/rnn.cc
@@ -23,6 +23,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_rnn, RNN);
+RegisterLayerClass(singacpp_rnn, RNN);
+RegisterLayerClass(singacuda_rnn, RNN);
+RegisterLayerClass(singacl_rnn, RNN);
 void RNN::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc
index 6a49131..2cbd264 100644
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@ -20,6 +20,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_softmax, Softmax);
+RegisterLayerClass(singacpp_softmax, Softmax);
+RegisterLayerClass(singacuda_softmax, Softmax);
+RegisterLayerClass(singacl_softmax, Softmax);
 void Softmax::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   CHECK_EQ(in_sample.size(), 1u);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index eff6783..897fdf5 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -113,7 +113,7 @@ def create_cuda_gpu_on(device_id):
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
-
+default_device = singa.Platform.GetDefaultDevice()
 def get_default_device():
     '''Get the default host device which is a CppCPU device'''
-    return singa.Platform.GetDefaultDevice()
+    return default_device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index 0759716..b0fdb5e 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -152,9 +152,9 @@ class Layer(object):
             for t in x:
                 x.append(t.singa_tensor)
         else:
-            assert isinstance(input, tensor.Tensor), \
+            assert isinstance(x, tensor.Tensor), \
                 'input must be a Tensor or a list of Tensor'
-            xs = x
+            xs = x.singa_tensor
         y = self.layer.Forward(flag, xs)
         if type(y) == list:
             return tensor.from_raw_tensors(y)
@@ -266,7 +266,7 @@ class Conv2D(Layer):
         self.conf.param.extend([bspecs])
         self.param_specs.append(bspecs)
 
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp'])
         self.layer = _create_layer(engine, 'Convolution')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -322,7 +322,7 @@ class Pooling2D(Layer):
         conf = self.conf.pooling_conf
         conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad)
         conf.pool = mode
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp'])
         self.layer = _create_layer(engine, 'Pooling')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -439,7 +439,7 @@ class BatchNormalization(Layer):
         self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
         self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
         self.param_specs.append(_construct_param_specs_from_dict(var_specs))
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'BatchNorm')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -466,7 +466,7 @@ class LRN(Layer):
         # TODO(wangwei) enable mode = 'within_channel'
         assert mode == 'cross_channel', 'only support mode="across_channel"'
         conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'LRN')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -513,7 +513,10 @@ class Dense(Layer):
         self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
         self.param_specs.append(_construct_param_specs_from_dict(b_specs))
         # dense layer is transparent to engine.
-        self.layer = _create_layer('singa', 'Dense')
+        if engine == 'cudnn':
+            self.layer = _create_layer('singacuda', 'Dense')
+        else:
+            self.layer = _create_layer(engine, 'Dense')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 
@@ -533,7 +536,7 @@ class Dropout(Layer):
         # 'cudnn' works for v>=5.0
         #  if engine.lower() == 'cudnn':
         #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'Dropout')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -549,8 +552,8 @@ class Activation(Layer):
     """
     def __init__(self, name, mode='relu', input_sample_shape=None):
         super(Activation, self).__init__(name)
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.conf.type = (engine + '_' + mode).lower()
-        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, mode)
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -568,7 +571,7 @@ class Softmax(Layer):
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacl', 'singacuda'])
         self.layer = _create_layer(engine, 'Softmax')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -587,7 +590,10 @@ class Flatten(Layer):
         conf = self.conf.flatten_conf
         conf.axis = axis
         # fltten layer is transparent to engine
-        self.layer = _create_layer('singa', 'Flatten')
+        if engine == 'cudnn':
+            self.layer = _create_layer('singacuda', 'Flatten')
+        else:
+            self.layer = _create_layer(engine, 'Flatten')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 338c6b0..86e68af 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -54,7 +54,7 @@ class Optimizer(object):
         lr (float): a constant for the learning rate, mutually exclusive with
             'lr_gen'.
         momentum (float): a constant for the momentum value
-        decay (float): the coefficent for L2 regularizer, which is mutually
+        weight_decay (float): the coefficent for L2 regularizer, which is mutually
             exclusive with 'regularizer'.
         lr_gen (function): a function returns the learning rate given
             the current training step/epoch. It is mutually exclusive with lr.
@@ -67,7 +67,7 @@ class Optimizer(object):
             constraint would be applied inside apply_with_lr(). Users can
             also do regularization outside.
     '''
-    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
         if lr is not None:
             assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
@@ -76,10 +76,10 @@ class Optimizer(object):
                 return lr
         self.lr_gen = lr_gen
         self.momentum = momentum
-        if decay is not None:
+        if weight_decay is not None:
             assert regularizer is None, \
-                'Cannot set decay and regularizer at the same time'
-            regularizer = L2Regularizer(decay)
+                'Cannot set weight_decay and regularizer at the same time'
+            regularizer = L2Regularizer(weight_decay)
         if regularizer is not None:
             if isinstance(regularizer, model_pb2.RegularizerConf):
                 self.regularizer = CppRegularizer(regularizer)
@@ -121,7 +121,7 @@ class Optimizer(object):
         if specs.decay_mult != 1:
             self.decay_multiplier[name] = specs.decay_mult
 
-    def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+    def apply_regularizer_constraint(self, epoch, value, grad, name=None):
         '''Apply regularization and constraint if available.
 
         If there are both global regularizer (constraint) and param specific
@@ -137,12 +137,12 @@ class Optimizer(object):
             the updated gradient Tensor
         '''
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, epoch)
+            self.constraints[name].apply(epoch, value, grad)
         elif self.constraint is not None:
             self.constraint.apply(epoch, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, epoch)
+            self.regularizers[name].apply(epoch, value, grad)
         elif self.regularizer is not None:
             self.regularizer.apply(epoch, value, grad)
         return grad
@@ -193,12 +193,13 @@ class SGD(Optimizer):
     See the base Optimizer for all arguments.
     '''
 
-    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
                                   constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = self.momentum
+        if self.momentum is not None:
+            conf.momentum = self.momentum
         conf.type = 'sgd'
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
@@ -215,12 +216,13 @@ class Nesterov(Optimizer):
     See the base Optimizer for all arguments.
     '''
 
-    def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=0.9, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
                                        constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = momentum
+        if self.momentum is not None:
+            conf.momentum = momentum
         conf.type = 'nesterov'
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
@@ -239,9 +241,9 @@ class AdaGrad(Optimizer):
     Args:
         epsilon (float): small number for preventing numeric error.
     '''
-    def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+    def __init__(self, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                       constraint)
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
@@ -265,9 +267,9 @@ class RMSProp(Optimizer):
         epsilon (float): small value for preventing numeric error
     '''
 
-    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                       constraint)
         conf = model_pb2.OptimizerConf()
         conf.rho = rho

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index e2572d3..5086fdc 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -56,7 +56,7 @@ import numpy as np
 from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-import device
+import device as pydevice
 
 
 class Tensor(object):
@@ -81,7 +81,8 @@ class Tensor(object):
             assert isinstance(shape, tuple), 'shape should be tuple'
             vs = list(shape)
             if device is None:
-                self.singa_tensor = singa.Tensor(vs, dtype)
+                device = pydevice.get_default_device()
+                self.singa_tensor = singa.Tensor(vs, device, dtype)
             else:
                 self.singa_tensor = singa.Tensor(vs, device, dtype)
             self.shape = shape
@@ -225,12 +226,10 @@ class Tensor(object):
         '''
         return _call_singa_func(self.singa_tensor.T)
 
-    '''
     def copy(self):
-        shallow copy
-            call copy constructor of singa::Tensor
+        '''shallow copy calls copy constructor of singa::Tensor
+        '''
         return _call_singa_func(singa.Tensor, self.singa_tensor)
-    '''
 
     def deepcopy(self):
         '''Same as clone().
@@ -513,7 +512,7 @@ def to_numpy(t):
     Returns:
         a numpy array
     '''
-    assert t.device == device.get_default_device() or t.device is None, \
+    assert (t.device.id() == -1) or (t.device is None), \
         'Please move the tensor onto the default host device'
 
     if t.dtype == core_pb2.kFloat32:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/swig/core_device.i
----------------------------------------------------------------------
diff --git a/src/python/swig/core_device.i b/src/python/swig/core_device.i
index 21b97b4..b3521be 100644
--- a/src/python/swig/core_device.i
+++ b/src/python/swig/core_device.i
@@ -49,9 +49,9 @@ class Device {
   int id() const;
 };
 
-#if USE_CUDA
 class Platform {
  public:
+#if USE_CUDA
   static int GetNumGPUs();
   static const std::vector<int> GetGPUIDs();
   static const std::pair<size_t, size_t> GetGPUMemSize(const int device);
@@ -61,9 +61,9 @@ class Platform {
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+#endif // USE_CUDA
   static std::shared_ptr<Device> GetDefaultDevice();
 };
 
-#endif // USE_CUDA
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/python/test_layer.py
----------------------------------------------------------------------
diff --git a/test/python/test_layer.py b/test/python/test_layer.py
index 7078240..441f352 100644
--- a/test/python/test_layer.py
+++ b/test/python/test_layer.py
@@ -25,6 +25,7 @@ class TestPythonLayer(unittest.TestCase):
                          )
 
     def setUp(self):
+        layer.engine='singacpp'
         self.w = {'init': 'Xavier', 'regularizer': 1e-4}
         self.b = {'init': 'Constant', 'value': 0}
         self.sample_shape = None
@@ -40,8 +41,8 @@ class TestPythonLayer(unittest.TestCase):
         in_sample_shape = (1, 3, 3)
         conv = layer.Conv2D('conv', 1, 3, 2, W_specs=self.w, b_specs=self.b,
                             pad=1, input_sample_shape=in_sample_shape)
-        cuda = device.create_cuda_gpu()
-        conv.to_device(cuda)
+        # cuda = device.create_cuda_gpu()
+        # conv.to_device(cuda)
         params = conv.param_values()
 
         raw_x = np.arange(9, dtype=np.float32) + 1
@@ -51,9 +52,9 @@ class TestPythonLayer(unittest.TestCase):
         params[0].copy_from_numpy(w)
         params[1].set_value(1.0)
 
-        x.to_device(cuda)
+        # x.to_device(cuda)
         y = conv.forward(model_pb2.kTrain, x)
-        y.to_host()
+        # y.to_host()
         npy = tensor.to_numpy(y).flatten()
 
         self.assertAlmostEqual(3.0, npy[0])
@@ -63,7 +64,7 @@ class TestPythonLayer(unittest.TestCase):
 
         dy = np.asarray([0.1, 0.2, 0.3, 0.4], dtype=np.float32).reshape(y.shape)
         grad = tensor.from_numpy(dy)
-        grad.to_device(cuda)
+        # grad.to_device(cuda)
         (dx, [dw, db]) = conv.backward(model_pb2.kTrain, grad)
         dx.to_host()
         dw.to_host()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/singa/test_cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cpp_cpu.cc b/test/singa/test_cpp_cpu.cc
index 35bd108..5f3308a 100644
--- a/test/singa/test_cpp_cpu.cc
+++ b/test/singa/test_cpp_cpu.cc
@@ -27,7 +27,7 @@ using singa::CppCPU;
 using singa::Block;
 TEST(CppCPU, Constructor) {
   CppCPU dev;
-  EXPECT_EQ(0, dev.id());
+  EXPECT_EQ(-1, dev.id());
 }
 
 TEST(CppCPU, MemoryMallocFree) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/singa/test_layer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_layer.cc b/test/singa/test_layer.cc
index aa01746..bb33dba 100644
--- a/test/singa/test_layer.cc
+++ b/test/singa/test_layer.cc
@@ -7,7 +7,7 @@ TEST(Layer, CreateLayer) {
       "convolution", "dense", "dropout", "relu", "batchnorm",
       "flatten",     "lrn",   "pooling", "prelu",      "softmax"};
   for (auto type : types) {
-    auto layer = singa::CreateLayer("singa_" + type);
+    auto layer = singa::CreateLayer("singacpp_" + type);
     // EXPECT_EQ(layer->layer_type(), type);
   }
 }

[31/51] [abbrv] incubator-singa git commit: SINGA-223 Use Sphinx to create the website.

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/zh/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/zh/_templates/layout.html b/doc/zh/_templates/layout.html
new file mode 100755
index 0000000..6b9f2c5
--- /dev/null
+++ b/doc/zh/_templates/layout.html
@@ -0,0 +1,61 @@
+{#
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+#}
+{% extends "!layout.html" %}
+
+{% block extrahead %}
+    <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
+{% endblock %}
+     
+{% block footer %}
+
+<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
+<a href="http://incubator.apache.org/">
+<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
+</a>
+ 
+  <span class="rst-current-version" data-toggle="rst-current-version">
+    <span class="fa fa-book"> incubator-singa </span>
+    v: {{ version }}
+    <span class="fa fa-caret-down"></span>
+  </span>
+  <div class="rst-other-versions">
+    <dl>
+      <dt>Languages</dt>
+      <dd><a href="{{pathto(''+ '../index.html' , 1) }}">English</a></dd>
+      <dd><a href="">\u4e2d\u6587</a></dd>	  
+	  <!--dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
+	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
+	  <dd><a href="/it/latest/">Italiano</a></dd>
+	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd-->
+    </dl>
+    <dl>
+      <dt>Versions</dt>
+      <dd><a href="/{{ language }}/latest/">latest</a></dd>
+      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
+    </dl>
+  </div>
+</div>
+
+ <a href="https://github.com/apache/incubator-singa">
+    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
+        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
+        alt="Fork me on GitHub">
+</a>
+
+{{ super() }}
+{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/zh/conf.py
----------------------------------------------------------------------
diff --git a/doc/zh/conf.py b/doc/zh/conf.py
new file mode 100755
index 0000000..332a0d1
--- /dev/null
+++ b/doc/zh/conf.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+#
+# incubator-singa documentation build configuration file, created by
+# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, os.path.abspath('../build/python'))
+
+# -- General configuration ------------------------------------------------
+from recommonmark.parser import CommonMarkParser
+
+source_parsers = {
+    '.md': CommonMarkParser,
+}
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ['.rst', '.md']
+
+# The encoding of source files.
+#
+source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'incubator-singa'
+copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
+author = u'moaz'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'1.0.0'
+# The full version, including alpha/beta/rc tags.
+release = u'1.0.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+# html_title = u'Singa v1.0.0'
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+html_logo = 'image/singa.png'
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['../_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+html_show_sourcelink = False
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Singadoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
+     u'moaz', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     author, 'incubator-singa', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/zh/index.md b/doc/zh/index.md
new file mode 100644
index 0000000..4b49d5f
--- /dev/null
+++ b/doc/zh/index.md
@@ -0,0 +1,9 @@
+SINGA \u4e2d\u6587\u6587\u6863
+==============
+
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index d6faf5d..3a9c0f0 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,3 +1,6 @@
+Examples
+--------
+
 .. toctree::
 
    char-rnn/README

[44/51] [abbrv] incubator-singa git commit: update results of rat check.

Posted by wa...@apache.org.

update results of rat check.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b3566e4c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b3566e4c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b3566e4c

Branch: refs/heads/master
Commit: b3566e4cbac4cdc606a751608b0fa0436e528c4e
Parents: a54c889
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Tue Aug 16 12:52:04 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 18 01:31:36 2016 +0800

----------------------------------------------------------------------
 rat_check | 1756 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1756 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b3566e4c/rat_check
----------------------------------------------------------------------
diff --git a/rat_check b/rat_check
new file mode 100644
index 0000000..1fcfd7d
--- /dev/null
+++ b/rat_check
@@ -0,0 +1,1756 @@
+
+*****************************************************
+Summary
+-------
+Generated at: 2016-08-16T12:50:50+08:00
+Notes: 4
+Binaries: 0
+Archives: 0
+Standards: 282
+
+Apache Licensed: 240
+Generated Documents: 0
+
+JavaDocs are generated and so license header is optional
+Generated files do not required license headers
+
+42 Unknown Licenses
+
+*******************************
+
+Unapproved licenses:
+
+  ./.gitmodules
+  ./.travis.yml
+  ./CMakeLists.txt
+  ./rat_check
+  ./cmake/Cuda.cmake
+  ./cmake/Dependencies.cmake
+  ./cmake/Protobuf.cmake
+  ./cmake/Utils.cmake
+  ./cmake/Templates/singa_config.h.in
+  ./cmake/Thirdparty/FindCBLAS.cmake
+  ./cmake/Thirdparty/FindCUDNN.cmake
+  ./cmake/Thirdparty/FindGlog.cmake
+  ./cmake/Thirdparty/FindLMDB.cmake
+  ./cmake/Thirdparty/FindOpenCL.cmake
+  ./examples/CMakeLists.txt
+  ./examples/index.rst
+  ./examples/cifar10/CMakeLists.txt
+  ./examples/cifar10/download_data.py
+  ./examples/cifar10/run-parallel.sh
+  ./examples/cifar10/run.sh
+  ./examples/imagenet/CMakeLists.txt
+  ./examples/imagenet/create_data.sh
+  ./examples/imagenet/run.sh
+  ./include/singa/utils/cuda_utils.h
+  ./include/singa/utils/timer.h
+  ./include/singa/utils/tinydir.h
+  ./src/CMakeLists.txt
+  ./src/core/tensor/distribution.cl
+  ./src/python/setup.py.in
+  ./src/python/singa/__init__.py
+  ./src/python/swig/numpy.i
+  ./test/CMakeLists.txt
+  ./test/gtest/CMakeLists.txt
+  ./test/gtest/gtest-all.cc
+  ./test/gtest/gtest.h
+  ./test/gtest/gtest_main.cc
+  ./test/python/test_layer.py
+  ./test/singa/test_layer.cc
+  ./test/singa/test_tensor.cc
+  ./test/singa/test_tensor_math.cc
+  ./test/singa/test_timer.cc
+  ./tool/cpplint.py
+
+*******************************
+
+Archives:
+
+*****************************************************
+  Files with Apache License headers will be marked AL
+  Binary files (which do not require AL headers) will be marked B
+  Compressed archives will be marked A
+  Notices, licenses etc will be marked N
+ !????? ./.gitmodules
+ !????? ./.travis.yml
+ !????? ./CMakeLists.txt
+  N     ./DISCLAIMER
+  N     ./LICENSE
+  N     ./NOTICE
+  N     ./RELEASE_NOTES
+  AL    ./jenkins.sh
+ !????? ./rat_check
+  AL    ./bin/singa-cleanup.sh
+  AL    ./bin/singa-console.sh
+  AL    ./bin/singa-env.sh
+  AL    ./bin/singa-run.sh
+  AL    ./bin/singa-stop.sh
+  AL    ./bin/zk-service.sh
+ !????? ./cmake/Cuda.cmake
+ !????? ./cmake/Dependencies.cmake
+ !????? ./cmake/Protobuf.cmake
+ !????? ./cmake/Utils.cmake
+ !????? ./cmake/Templates/singa_config.h.in
+ !????? ./cmake/Thirdparty/FindCBLAS.cmake
+ !????? ./cmake/Thirdparty/FindCUDNN.cmake
+ !????? ./cmake/Thirdparty/FindGlog.cmake
+ !????? ./cmake/Thirdparty/FindLMDB.cmake
+ !????? ./cmake/Thirdparty/FindOpenCL.cmake
+ !????? ./examples/CMakeLists.txt
+ !????? ./examples/index.rst
+  AL    ./examples/char-rnn/sample.py
+  AL    ./examples/char-rnn/train.py
+ !????? ./examples/cifar10/CMakeLists.txt
+  AL    ./examples/cifar10/alexnet-parallel.cc
+  AL    ./examples/cifar10/alexnet.cc
+  AL    ./examples/cifar10/alexnet.py
+  AL    ./examples/cifar10/cifar10.h
+ !????? ./examples/cifar10/download_data.py
+  AL    ./examples/cifar10/predict.py
+ !????? ./examples/cifar10/run-parallel.sh
+ !????? ./examples/cifar10/run.sh
+  AL    ./examples/cifar10/train.py
+  AL    ./examples/cifar10/vgg-parallel.cc
+  AL    ./examples/cifar10/vgg.py
+ !????? ./examples/imagenet/CMakeLists.txt
+  AL    ./examples/imagenet/alexnet.cc
+ !????? ./examples/imagenet/create_data.sh
+  AL    ./examples/imagenet/ilsvrc12.cc
+  AL    ./examples/imagenet/ilsvrc12.h
+ !????? ./examples/imagenet/run.sh
+  AL    ./examples/mnist/train.py
+  AL    ./include/singa/core/common.h
+  AL    ./include/singa/core/device.h
+  AL    ./include/singa/core/memory.h
+  AL    ./include/singa/core/scheduler.h
+  AL    ./include/singa/core/tensor.h
+  AL    ./include/singa/io/decoder.h
+  AL    ./include/singa/io/encoder.h
+  AL    ./include/singa/io/integer.h
+  AL    ./include/singa/io/network.h
+  AL    ./include/singa/io/reader.h
+  AL    ./include/singa/io/snapshot.h
+  AL    ./include/singa/io/transformer.h
+  AL    ./include/singa/io/writer.h
+  AL    ./include/singa/model/feed_forward_net.h
+  AL    ./include/singa/model/initializer.h
+  AL    ./include/singa/model/layer.h
+  AL    ./include/singa/model/loss.h
+  AL    ./include/singa/model/metric.h
+  AL    ./include/singa/model/optimizer.h
+  AL    ./include/singa/model/updater.h
+  AL    ./include/singa/utils/channel.h
+ !????? ./include/singa/utils/cuda_utils.h
+  AL    ./include/singa/utils/factory.h
+  AL    ./include/singa/utils/integer.h
+  AL    ./include/singa/utils/logging.h
+  AL    ./include/singa/utils/opencl_utils.h
+  AL    ./include/singa/utils/safe_queue.h
+  AL    ./include/singa/utils/singleton.h
+  AL    ./include/singa/utils/string.h
+ !????? ./include/singa/utils/timer.h
+ !????? ./include/singa/utils/tinydir.h
+ !????? ./src/CMakeLists.txt
+  AL    ./src/core/device/cpp_cpu.cc
+  AL    ./src/core/device/cuda_gpu.cc
+  AL    ./src/core/device/device.cc
+  AL    ./src/core/device/opencl_device.cc
+  AL    ./src/core/device/platform.cc
+  AL    ./src/core/memory/memory.cc
+  AL    ./src/core/scheduler/scheduler.cc
+ !????? ./src/core/tensor/distribution.cl
+  AL    ./src/core/tensor/math_kernel.cu
+  AL    ./src/core/tensor/math_kernel.h
+  AL    ./src/core/tensor/sparse_tensor.cc
+  AL    ./src/core/tensor/tensor.cc
+  AL    ./src/core/tensor/tensor_math.h
+  AL    ./src/core/tensor/tensor_math_cpp.h
+  AL    ./src/core/tensor/tensor_math_cuda.h
+  AL    ./src/core/tensor/tensor_math_opencl.cl
+  AL    ./src/core/tensor/tensor_math_opencl.h
+  AL    ./src/io/binfile_reader.cc
+  AL    ./src/io/binfile_writer.cc
+  AL    ./src/io/csv_decoder.cc
+  AL    ./src/io/csv_encoder.cc
+  AL    ./src/io/image_transformer.cc
+  AL    ./src/io/jpg_decoder.cc
+  AL    ./src/io/jpg_encoder.cc
+  AL    ./src/io/lmdb_reader.cc
+  AL    ./src/io/lmdb_writer.cc
+  AL    ./src/io/snapshot.cc
+  AL    ./src/io/textfile_reader.cc
+  AL    ./src/io/textfile_writer.cc
+  AL    ./src/io/network/endpoint.cc
+  AL    ./src/io/network/message.cc
+  AL    ./src/model/feed_forward_net.cc
+  AL    ./src/model/rnn.cc
+  AL    ./src/model/layer/activation.cc
+  AL    ./src/model/layer/activation.h
+  AL    ./src/model/layer/batchnorm.cc
+  AL    ./src/model/layer/batchnorm.h
+  AL    ./src/model/layer/convolution.cc
+  AL    ./src/model/layer/convolution.h
+  AL    ./src/model/layer/cudnn_activation.cc
+  AL    ./src/model/layer/cudnn_activation.h
+  AL    ./src/model/layer/cudnn_batchnorm.cc
+  AL    ./src/model/layer/cudnn_batchnorm.h
+  AL    ./src/model/layer/cudnn_convolution.cc
+  AL    ./src/model/layer/cudnn_convolution.h
+  AL    ./src/model/layer/cudnn_dropout.cc
+  AL    ./src/model/layer/cudnn_dropout.h
+  AL    ./src/model/layer/cudnn_lrn.cc
+  AL    ./src/model/layer/cudnn_lrn.h
+  AL    ./src/model/layer/cudnn_pooling.cc
+  AL    ./src/model/layer/cudnn_pooling.h
+  AL    ./src/model/layer/cudnn_rnn.cc
+  AL    ./src/model/layer/cudnn_rnn.h
+  AL    ./src/model/layer/cudnn_softmax.cc
+  AL    ./src/model/layer/cudnn_softmax.h
+  AL    ./src/model/layer/cudnn_utils.h
+  AL    ./src/model/layer/dense.cc
+  AL    ./src/model/layer/dense.h
+  AL    ./src/model/layer/dropout.cc
+  AL    ./src/model/layer/dropout.h
+  AL    ./src/model/layer/flatten.cc
+  AL    ./src/model/layer/flatten.h
+  AL    ./src/model/layer/lrn.cc
+  AL    ./src/model/layer/lrn.h
+  AL    ./src/model/layer/pooling.cc
+  AL    ./src/model/layer/pooling.h
+  AL    ./src/model/layer/prelu.cc
+  AL    ./src/model/layer/prelu.h
+  AL    ./src/model/layer/rnn.cc
+  AL    ./src/model/layer/rnn.h
+  AL    ./src/model/layer/softmax.cc
+  AL    ./src/model/layer/softmax.h
+  AL    ./src/model/loss/mse.cc
+  AL    ./src/model/loss/softmax_cross_entropy.cc
+  AL    ./src/model/metric/accuracy.cc
+  AL    ./src/model/optimizer/adagrad.cc
+  AL    ./src/model/optimizer/local_all_reduce.cc
+  AL    ./src/model/optimizer/nesterov.cc
+  AL    ./src/model/optimizer/optimizer.cc
+  AL    ./src/model/optimizer/rmsprop.cc
+  AL    ./src/model/optimizer/sgd.cc
+  AL    ./src/model/updater/local_updater.cc
+  AL    ./src/model/updater/updater.cc
+  AL    ./src/proto/core.proto
+  AL    ./src/proto/io.proto
+  AL    ./src/proto/model.proto
+ !????? ./src/python/setup.py.in
+ !????? ./src/python/singa/__init__.py
+  AL    ./src/python/singa/command.py
+  AL    ./src/python/singa/device.py
+  AL    ./src/python/singa/initializer.py
+  AL    ./src/python/singa/layer.py
+  AL    ./src/python/singa/loss.py
+  AL    ./src/python/singa/metric.py
+  AL    ./src/python/singa/model.py
+  AL    ./src/python/singa/net.py
+  AL    ./src/python/singa/optimizer.py
+  AL    ./src/python/singa/tensor.py
+  AL    ./src/python/singa/utils.py
+  AL    ./src/python/swig/core_device.i
+  AL    ./src/python/swig/core_tensor.i
+  AL    ./src/python/swig/model_layer.i
+  AL    ./src/python/swig/model_loss.i
+  AL    ./src/python/swig/model_metric.i
+  AL    ./src/python/swig/model_optimizer.i
+ !????? ./src/python/swig/numpy.i
+  AL    ./src/python/swig/singa.i
+  AL    ./src/utils/channel.cc
+  AL    ./src/utils/logging.cc
+  AL    ./src/utils/opencl_utils.cc
+ !????? ./test/CMakeLists.txt
+ !????? ./test/gtest/CMakeLists.txt
+ !????? ./test/gtest/gtest-all.cc
+ !????? ./test/gtest/gtest.h
+ !????? ./test/gtest/gtest_main.cc
+ !????? ./test/python/test_layer.py
+  AL    ./test/python/test_optimizer.py
+  AL    ./test/python/test_tensor.py
+  AL    ./test/singa/test_accuracy.cc
+  AL    ./test/singa/test_activation.cc
+  AL    ./test/singa/test_adagrad.cc
+  AL    ./test/singa/test_batchnorm.cc
+  AL    ./test/singa/test_binfile_rw.cc
+  AL    ./test/singa/test_channel.cc
+  AL    ./test/singa/test_convolution.cc
+  AL    ./test/singa/test_cpp_cpu.cc
+  AL    ./test/singa/test_cross_entropy.cc
+  AL    ./test/singa/test_csv.cc
+  AL    ./test/singa/test_cudnn_activation.cc
+  AL    ./test/singa/test_cudnn_batchnorm.cc
+  AL    ./test/singa/test_cudnn_convolution.cc
+  AL    ./test/singa/test_cudnn_dropout.cc
+  AL    ./test/singa/test_cudnn_lrn.cc
+  AL    ./test/singa/test_cudnn_pooling.cc
+  AL    ./test/singa/test_cudnn_rnn.cc
+  AL    ./test/singa/test_cudnn_softmax.cc
+  AL    ./test/singa/test_dense.cc
+  AL    ./test/singa/test_dropout.cc
+  AL    ./test/singa/test_ep.cc
+  AL    ./test/singa/test_flatten.cc
+  AL    ./test/singa/test_image_transformer.cc
+  AL    ./test/singa/test_initializer.cc
+  AL    ./test/singa/test_jpg.cc
+ !????? ./test/singa/test_layer.cc
+  AL    ./test/singa/test_lmdb_rw.cc
+  AL    ./test/singa/test_logging.cc
+  AL    ./test/singa/test_lrn.cc
+  AL    ./test/singa/test_memory.cc
+  AL    ./test/singa/test_mse.cc
+  AL    ./test/singa/test_nesterov.cc
+  AL    ./test/singa/test_opencl.cc
+  AL    ./test/singa/test_platform.cc
+  AL    ./test/singa/test_pooling.cc
+  AL    ./test/singa/test_prelu.cc
+  AL    ./test/singa/test_rmsprop.cc
+  AL    ./test/singa/test_sgd.cc
+  AL    ./test/singa/test_snapshot.cc
+  AL    ./test/singa/test_softmax.cc
+ !????? ./test/singa/test_tensor.cc
+ !????? ./test/singa/test_tensor_math.cc
+  AL    ./test/singa/test_textfile_rw.cc
+ !????? ./test/singa/test_timer.cc
+  AL    ./thirdparty/install.sh
+ !????? ./tool/cpplint.py
+  AL    ./tool/graph.py
+  AL    ./tool/node.sh
+  AL    ./tool/docker/mesos/.bashrc
+  AL    ./tool/docker/mesos/Dockerfile
+  AL    ./tool/docker/mesos/core-site.xml
+  AL    ./tool/docker/mesos/hdfs-site.xml
+  AL    ./tool/docker/mesos/install.sh
+  AL    ./tool/docker/mesos/mapred-site.xml
+  AL    ./tool/docker/mesos/yarn-site.xml
+  AL    ./tool/docker/singa/.bashrc
+  AL    ./tool/docker/singa/Dockerfile
+  AL    ./tool/docker/singa/Dockerfile_gpu
+  AL    ./tool/mesos/scheduler.proto
+  AL    ./tool/mesos/singa_scheduler.cc
+  AL    ./tool/python/singa.py
+  AL    ./tool/python/examples/__init__.py
+  AL    ./tool/python/examples/cifar10_cnn.py
+  AL    ./tool/python/examples/cifar10_cnn_cudnn.py
+  AL    ./tool/python/examples/cifar10_cnn_parameter.py
+  AL    ./tool/python/examples/mnist_ae.py
+  AL    ./tool/python/examples/mnist_mlp.py
+  AL    ./tool/python/examples/mnist_mlp_parameter.py
+  AL    ./tool/python/examples/mnist_mlp_test.py
+  AL    ./tool/python/examples/mnist_rbm1.py
+  AL    ./tool/python/examples/mnist_rbm2.py
+  AL    ./tool/python/examples/mnist_rbm3.py
+  AL    ./tool/python/examples/mnist_rbm4.py
+  AL    ./tool/python/examples/train_cifar10.py
+  AL    ./tool/python/examples/train_mnist.py
+  AL    ./tool/python/examples/datasets/__init__.py
+  AL    ./tool/python/examples/datasets/cifar10.py
+  AL    ./tool/python/examples/datasets/mnist.py
+  AL    ./tool/python/singa/__init__.py
+  AL    ./tool/python/singa/driver.i
+  AL    ./tool/python/singa/generatepy.sh
+  AL    ./tool/python/singa/initializations.py
+  AL    ./tool/python/singa/layer.py
+  AL    ./tool/python/singa/model.py
+  AL    ./tool/python/singa/parameter.py
+  AL    ./tool/python/singa/utils/__init__.py
+  AL    ./tool/python/singa/utils/message.py
+  AL    ./tool/python/singa/utils/utility.py
+ 
+*****************************************************
+ Printing headers for files without AL header...
+ 
+ 
+=======================================================================
+==./.gitmodules
+=======================================================================
+[submodule "lib/cnmem"]
+	path = lib/cnmem
+	url = https://github.com/NVIDIA/cnmem.git
+
+=======================================================================
+==./.travis.yml
+=======================================================================
+sudo: required
+language: cpp
+compiler: gcc
+dist: trusty
+
+before_install:
+ - sudo apt-get -qq update
+ - sudo apt-get install -qq -y libopenblas-dev libgoogle-glog-dev libprotobuf-dev protobuf-compiler
+ - sudo apt-get install -qq -y opencl-headers ocl-icd-*
+ - wget https://github.com/KhronosGroup/OpenCL-CLHPP/releases/download/v2.0.9/cl2.hpp
+ - sudo mv cl2.hpp /usr/include/CL/
+#- sudo apt-get install -qq libgtest-dev
+
+before_script:
+ - mkdir build && cd build
+ - cmake .. -DUSE_CUDA=OFF -DUSE_CUDNN=OFF -DUSE_PYTHON=OFF -DBUILD_OPENCL_TESTS=OFF
+
+script:
+ - make
+ - ./bin/test_singa --gtest_output=xml:./../gtest.xml
+
+
+=======================================================================
+==./CMakeLists.txt
+=======================================================================
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+PROJECT(singa)
+SET(PACKAGE_VERSION "1.0.0")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g -O2 ")
+
+LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
+#message(STATUS "module path: ${CMAKE_MODULE_PATH}")
+
+# Flags
+IF(UNIX OR APPLE)
+  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall")
+ENDIF()
+IF(CMAKE_BUILD_TYPE=Debug)
+  SET(NVCC_FLAG "${NVCC_FLAG} -g -G ")
+ENDIF()
+#message(STATUS "${CMAKE_CXX_FLAGS}")
+SET(SINGA_INCLUDE_DIR
+    "${CMAKE_SOURCE_DIR}/include;${CMAKE_SOURCE_DIR}/lib/cnmem/include;${PROJECT_BINARY_DIR}")
+INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
+
+OPTION(USE_CBLAS "Use CBlas libs" ON)
+OPTION(USE_CUDA "Use Cuda libs" ON)
+OPTION(USE_CUDNN "Use Cudnn libs" ON)
+OPTION(USE_OPENCV "Use opencv" OFF)
+OPTION(USE_LMDB "Use LMDB libs" OFF)
+OPTION(USE_PYTHON "Generate py wrappers" ON)
+OPTION(USE_OPENCL "Use OpenCL" OFF)
+OPTION(ENABLE_DIST "enable distributed training" OFF)
+#OPTION(BUILD_OPENCL_TESTS "Build OpenCL tests" OFF)
+
+INCLUDE("cmake/Dependencies.cmake")
+INCLUDE("cmake/Utils.cmake")
+ADD_DEFINITIONS(-DUSE_CMAKE)
+#message(STATUS "${SINGA_INCLUDE_DIR}")
+
+CONFIGURE_FILE (
+    "${PROJECT_SOURCE_DIR}/cmake/Templates/singa_config.h.in"
+    "${PROJECT_BINARY_DIR}/include/singa/singa_config.h")
+
+#set(SINGA_CONFIGURE_SRC "${PROJECT_BINARY_DIR}/singa_config.h")
+#LIST(APPEND SRCS ${SINGA_CONFIGURE_SRCS} ${PROJECT_BINARY_DIR}/singa_config.h)
+
+SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
+SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
+
+IF (USE_CUDA)
+    ADD_SUBDIRECTORY(lib/cnmem)
+    LIST(APPEND SINGA_LINKER_LIBS cnmem)
+ENDIF()
+
+=======================================================================
+==./rat_check
+=======================================================================
+
+=======================================================================
+==./cmake/Cuda.cmake
+=======================================================================
+
+FIND_PACKAGE(CUDA 5.5 QUIET)
+
+IF(NOT CUDA_FOUND)
+    return()
+ENDIF()
+
+SET(HAVE_CUDA TRUE)
+MESSAGE(STATUS "Found cuda_v${CUDA_VERSION}")
+#ADD_DEFINITIONS(-DUSE_CUDA)
+#message(STATUS "linking: ${CUDA_CUDART_LIBRARY} ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}")
+
+IF(USE_CUDNN)
+#include(cmake/Modules/Cudnn.cmake)
+    FIND_PACKAGE(CUDNN REQUIRED)
+    INCLUDE_DIRECTORIES(SYSTEM ${CUDNN_INCLUDE_DIR})
+    LIST(APPEND SINGA_LINKER_LIBS ${CUDNN_LIBRARIES})
+    #ADD_DEFINITIONS(-DUSE_CUDNN)
+    #ADD_DEFINITIONS(-DCUDNN_VERSION_MAJOR=${CUDNN_VERSION_MAJOR})
+ENDIF()
+
+INCLUDE_DIRECTORIES(SYSTEM ${CUDA_INCLUDE_DIRS})
+LIST(APPEND SINGA_LINKER_LIBS ${CUDA_CUDART_LIBRARY} ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
+#MESSAGE(STATUS "libs " ${SINGA_LINKER_LIBS})
+
+=======================================================================
+==./cmake/Dependencies.cmake
+=======================================================================
+SET(SINGA_LINKER_LIBS "")
+
+#INCLUDE("cmake/ProtoBuf.cmake")
+
+FIND_PACKAGE( Protobuf REQUIRED )
+INCLUDE_DIRECTORIES(SYSTEM ${PROTOBUF_INCLUDE_DIR})
+MESSAGE(STATUS "proto libs " ${PROTOBUF_LIBRARIES})
+LIST(APPEND SINGA_LINKER_LIBS ${PROTOBUF_LIBRARIES})
+INCLUDE("cmake/Protobuf.cmake")
+
+#FIND_PACKAGE(Glog)
+#IF(GLOG_FOUND)
+#    MESSAGE(STATUS "GLOG FOUND at ${GLOG_INCLUDE_DIR}")
+#    ADD_DEFINITIONS("-DUSE_GLOG")
+#    LIST(APPEND SINGA_LINKER_LIBS ${GLOG_LIBRARIES})
+#ENDIF()
+
+IF(USE_LMDB)
+    FIND_PACKAGE(LMDB REQUIRED)
+    INCLUDE_DIRECTORIES(SYSTEM ${LMDB_INCLUDE_DIR})
+    LIST(APPEND SINGA_LINKER_LIBS ${LMDB_LIBRARIES})
+    MESSAGE(STATUS "FOUND lmdb at ${LMDB_INCLUDE_DIR}")
+ENDIF()
+
+IF(USE_CUDA)
+    INCLUDE("cmake/Cuda.cmake")
+ELSE()
+    SET(USE_CUDNN FALSE)
+ENDIF()
+
+IF(USE_CBLAS)
+    FIND_PACKAGE(CBLAS REQUIRED)
+    INCLUDE_DIRECTORIES(SYSTEM ${CBLAS_INCLUDE_DIR})
+    LIST(APPEND SINGA_LINKER_LIBS ${CBLAS_LIBRARIES})
+    MESSAGE(STATUS "FOUND cblas at ${CBLAS_LIBRARIES}")
+ENDIF()
+
+IF(USE_OPENCL)
+    FIND_PACKAGE(OpenCL REQUIRED)
+    IF(NOT OPENCL_FOUND)
+        MESSAGE(SEND_ERROR "OpenCL was requested, but not found.")
+    ELSE()
+        INCLUDE_DIRECTORIES(SYSTEM ${OpenCL_INCPATH})
+        LIST(APPEND SINGA_LINKER_LIBS ${OPENCL_LIBRARIES})
+        MESSAGE(STATUS "Found OpenCL at ${OPENCL_INCLUDE_DIRS}")
+        IF(NOT OPENCL_HAS_CPP_BINDINGS)
+            MESSAGE(SEND_ERROR "OpenCL C++ bindings cl2.hpp was not found.")
+        ELSE()
+            MESSAGE(STATUS "Found OpenCL C++ bindings.")
+        ENDIF()
+
+=======================================================================
+==./cmake/Protobuf.cmake
+=======================================================================
+# copy from cmake source code
+function(PROTOBUF_GENERATE_PYTHON OUTPUT)
+    if(NOT ARGN)
+        message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called 
+        without any proto files")
+        return()
+    endif(NOT ARGN)
+
+    set(${OUTPUT})
+    foreach(FIL ${ARGN})
+        get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
+        get_filename_component(FIL_WE ${FIL} NAME_WE)
+        get_filename_component(PATH ${FIL} PATH)
+
+        list(APPEND ${OUTPUT} "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py")
+
+        add_custom_command(
+            OUTPUT "${CMAKE_BINARY_DIR}/python/singa/proto/${FIL_WE}_pb2.py"
+            COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
+            ARGS --python_out ${CMAKE_BINARY_DIR}/python/singa/proto
+                 --proto_path ${PATH} ${ABS_FIL}
+            DEPENDS ${ABS_FIL}
+            COMMENT "Running Python protocol buffer compiler on ${FIL}" VERBATIM)
+    endforeach()
+    
+    set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
+    set(${OUTPUT} ${${OUTPUT}} PARENT_SCOPE)
+endfunction()
+
+=======================================================================
+==./cmake/Utils.cmake
+=======================================================================
+
+macro(swig_generate_cxx pylist_variable)
+    if(NOT EXISTS "${CMKAE_BINARY_DIR}/python")
+        execute_process(
+            COMMAND mkdir ${CMAKE_BINARY_DIR}/python
+            COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa
+            COMMAND mkdir ${CMAKE_BINARY_DIR}/python/singa/proto
+            ERROR_QUIET)
+    endif()
+    execute_process(
+        COMMAND swig -c++ -python -I${CMAKE_SOURCE_DIR}/include 
+        -outdir ${CMAKE_BINARY_DIR}/python/singa
+        ${ARGN})
+
+    set(${pylist_variable} "${CMAKE_SOURCE_DIR}/src/python/swig/singa_wrap.cxx")
+endmacro()
+
+function (create_symlinks)
+    # Do nothing if building in-source
+    if (${CMAKE_CURRENT_BINARY_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
+        return()
+    endif()
+
+    foreach (path_file ${ARGN})
+        get_filename_component(folder ${path_file} PATH)
+
+        # Create REAL folder
+        file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/${folder}")
+
+        # Delete symlink if it exists
+        file(REMOVE "${CMAKE_BINARY_DIR}/${path_file}")
+
+        # Get OS dependent path to use in `execute_process`
+        file(TO_NATIVE_PATH "${CMAKE_BINARY_DIR}/${path_file}" link)
+        file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${path_file}" target)
+
+        if (UNIX)
+            set(command ln -s ${target} ${link})
+        else()
+            set(command cmd.exe /c mklink ${link} ${target})
+        endif()
+
+        execute_process(COMMAND ${command} 
+                        RESULT_VARIABLE result
+                        ERROR_VARIABLE output)
+
+        if (NOT ${result} EQUAL 0)
+            message(FATAL_ERROR "Could not create symbolic link for: ${target} --> ${output}")
+        endif()
+
+
+=======================================================================
+==./cmake/Templates/singa_config.h.in
+=======================================================================
+// Source directory
+#define SOURCE_FOLDER "${PROJECT_SOURCE_DIR}"
+
+// Binaries director
+#define BINARY_FOLDER "${PROJECT_BINARY_DIR}"
+
+#cmakedefine CPU_ONLY
+
+#cmakedefine USE_CBLAS
+
+#cmakedefine USE_OPENCV
+// cuda
+#cmakedefine USE_CUDA
+
+#cmakedefine USE_CUDNN
+#cmakedefine CUDNN_VERSION_MAJOR @CUDNN_VERSION_MAJOR@
+#cmakedefine CUDNN_VERSION_MINOR @CUDNN_VERSION_MINOR@
+#cmakedefine CUDNN_VERSION_PATCH @CUDNN_VERSION_PATCH@
+#cmakedefine CUDNN_VERSION_SWIG @CUDNN_VERSION_SWIG@
+
+#cmakedefine USE_OPENCL
+
+#cmakedefine ENABLE_DIST
+
+// lmdb
+#cmakedefine USE_LMDB
+
+
+=======================================================================
+==./cmake/Thirdparty/FindCBLAS.cmake
+=======================================================================
+
+FIND_PATH(CBLAS_INCLUDE_DIR NAMES cblas.h PATHS "$ENV{CBLAS_DIR}/include")
+FIND_LIBRARY(CBLAS_LIBRARIES NAMES openblas PATHS "$ENV{CBLAS_DIR}/lib")
+
+INCLUDE(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CBLAS DEFAULT_MSG CBLAS_INCLUDE_DIR CBLAS_LIBRARIES)
+
+IF(CBLAS_FOUND)
+    #    MESSAGE(STATUS "Found cblas at ${CBLAS_INCLUDE_DIR}")
+    MARK_AS_ADVANCED(CBLAS_INCLUDE_DIR CBLAS_LIBRARIES)
+ENDIF()
+
+=======================================================================
+==./cmake/Thirdparty/FindCUDNN.cmake
+=======================================================================
+
+FIND_PATH(CUDNN_INCLUDE_DIR NAME "cudnn.h" PATHS "$ENV{CMAKE_INCLUDE_PATH}")
+FIND_LIBRARY(CUDNN_LIBRARIES NAME "libcudnn.so" PATHS "$ENV{CMAKE_LIBRARY_PATH}")
+
+#message("cudnn include path:${CUDNN_INCLUDE_DIR}  lib path: ${CUDNN_LIBRARIES}")
+#message("env include path:$ENV{CUDNN_DIR} next: $ENV{CMAKE_INCLUDE_PATH}")
+INCLUDE(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARIES)
+
+IF(CUDNN_FOUND)
+    FILE(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
+    STRING(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
+        CUDNN_VERSION_MAJOR "${CUDNN_VERSION_FILE_CONTENTS}")
+    STRING(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
+        CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}")
+    STRING(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
+        CUDNN_VERSION_MINOR "${CUDNN_VERSION_FILE_CONTENTS}")
+    STRING(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
+        CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}")
+    STRING(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
+        CUDNN_VERSION_PATCH "${CUDNN_VERSION_FILE_CONTENTS}")
+    STRING(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
+        CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
+
+    IF(NOT CUDNN_VERSION_MAJOR)
+        SET(CUDNN_VERSION "???")
+    ELSE()
+      MATH(EXPR CUDNN_VERSION_SWIG "${CUDNN_VERSION_MAJOR} * 1000 + ${CUDNN_VERSION_MINOR} * 100 + ${CUDNN_VERSION_PATCH}")
+    ENDIF()
+    MESSAGE(STATUS "Found Cudnn_v${CUDNN_VERSION_SWIG} at ${CUDNN_INCLUDE_DIR} ${CUDNN_LIBRARIES}")
+    MARK_AS_ADVANCED(CUDNN_INCLUDE_DIR CUDNN_LIBRARIES)
+
+ENDIF()
+
+=======================================================================
+==./cmake/Thirdparty/FindGlog.cmake
+=======================================================================
+
+FIND_PATH(GLOG_INCLUDE_DIR NAMES glog/logging.h PATHS "$ENV{GLOG_DIR}/include")
+FIND_LIBRARY(GLOG_LIBRARIES NAMES glog)
+
+INCLUDE(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(GLOG DEFAULT_MSG GLOG_INCLUDE_DIR GLOG_LIBRARIES)
+
+IF(GLOG_FOUND)
+    #    MESSAGE(STATUS "Found glog at ${GLOG_INCLUDE_DIR}")
+    MARK_AS_ADVANCED(GLOG_INCLUDE_DIR GLOG_LIBRARIES)
+ENDIF()
+
+=======================================================================
+==./cmake/Thirdparty/FindLMDB.cmake
+=======================================================================
+
+FIND_PATH(LMDB_INCLUDE_DIR NAMES lmdb.h PATHS "$ENV{LMDB_DIR}/include")
+FIND_LIBRARY(LMDB_LIBRARIES NAMES lmdb PATHS "$ENV{LMDB_DIR}/include")
+
+INCLUDE(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(LMDB DEFAULT_MSG LMDB_INCLUDE_DIR LMDB_LIBRARIES)
+
+IF(LMDB_FOUND)
+    MESSAGE(STATUS "Found lmdb at ${LMDB_INCLUDE_DIR}")
+    MARK_AS_ADVANCED(LMDB_INCLUDE_DIR LMDB_LIBRARIES)
+    
+ENDIF()
+
+=======================================================================
+==./cmake/Thirdparty/FindOpenCL.cmake
+=======================================================================
+# This script was taken from https://github.com/elhigu/cmake-findopencl
+# and modified to support finding OpenCL 2.x C++ bindings.
+
+# Find OpenCL
+#
+# To set manually the paths, define these environment variables:
+# OpenCL_INCPATH    - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include)
+# OpenCL_LIBPATH    - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia)
+#
+# Once done this will define
+#  OPENCL_FOUND            - system has OpenCL
+#  OPENCL_INCLUDE_DIRS     - the OpenCL include directory
+#  OPENCL_LIBRARIES        - link these to use OpenCL
+#  OPENCL_HAS_CPP_BINDINGS - system has also cl2.hpp
+
+FIND_PACKAGE(PackageHandleStandardArgs)
+
+SET (OPENCL_VERSION_STRING "0.1.0")
+SET (OPENCL_VERSION_MAJOR 0)
+SET (OPENCL_VERSION_MINOR 1)
+SET (OPENCL_VERSION_PATCH 0)
+
+IF (APPLE)
+
+	# IF OpenCL_LIBPATH is given use it and don't use default path
+	IF (DEFINED ENV{OpenCL_LIBPATH})
+		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL PATHS ENV OpenCL_LIBPATH NO_DEFAULT_PATH)
+	ELSE ()
+		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
+	ENDIF ()
+
+	# IF OpenCL_INCPATH is given use it and find for CL/cl.h and OpenCL/cl.h do not try to find default paths
+	IF (DEFINED ENV{OpenCL_INCPATH})
+		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h OpenCL/cl.h PATHS ENV OpenCL_INCPATH NO_DEFAULT_PATH)
+		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl2.hpp OpenCL/cl2.hpp PATHS ${OPENCL_INCLUDE_DIRS} NO_DEFAULT_PATH)
+	ELSE ()
+		FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
+		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl2.hpp DOC "Include for OpenCL CPP bindings on OSX")
+	ENDIF ()
+
+ELSE (APPLE)
+
+	IF (WIN32)
+
+		# Find OpenCL includes and libraries from environment variables provided by vendor
+		SET(OPENCL_INCLUDE_SEARCH_PATHS)
+		SET(OPENCL_LIBRARY_SEARCH_PATHS)
+		SET(OPENCL_LIBRARY_64_SEARCH_PATHS)
+
+		# Nvidia
+
+=======================================================================
+==./examples/CMakeLists.txt
+=======================================================================
+ADD_SUBDIRECTORY(cifar10)
+ADD_SUBDIRECTORY(imagenet)
+
+=======================================================================
+==./examples/index.rst
+=======================================================================
+Examples
+========
+
+.. toctree::
+
+   cifar10/README
+   char-rnn/README
+   imagenet/README
+
+
+
+=======================================================================
+==./examples/cifar10/CMakeLists.txt
+=======================================================================
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
+
+IF(USE_CUDNN)
+ADD_EXECUTABLE(alexnet alexnet.cc)
+ADD_DEPENDENCIES(alexnet singa_core singa_model singa_utils)
+TARGET_LINK_LIBRARIES(alexnet singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
+
+ADD_EXECUTABLE(alexnet-parallel alexnet-parallel.cc)
+ADD_DEPENDENCIES(alexnet-parallel singa_core singa_model singa_utils)
+TARGET_LINK_LIBRARIES(alexnet-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
+SET_TARGET_PROPERTIES(alexnet-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
+
+ADD_EXECUTABLE(vgg-parallel vgg-parallel.cc)
+ADD_DEPENDENCIES(vgg-parallel singa_core singa_model singa_utils)
+TARGET_LINK_LIBRARIES(vgg-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
+SET_TARGET_PROPERTIES(vgg-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
+ENDIF(USE_CUDNN)
+
+=======================================================================
+==./examples/cifar10/download_data.py
+=======================================================================
+#!/usr/bin/env python
+import urllib
+import tarfile
+import os
+import sys
+import argparse
+
+
+def extract_tarfile(filepath):
+    if os.path.exists(filepath):
+        print 'The tar file does exist. Extracting it now..'
+        with tarfile.open(filepath, 'r') as f:
+            f.extractall('.')
+        print 'Finished!'
+        sys.exit(0)
+
+
+def check_dir_exist(dirpath):
+    if os.path.exists(dirpath):
+        print 'Directory %s does exist. To redownload the files, '\
+            'remove the existing directory and %s.tar.gz' % (dirpath, dirpath)
+        return True
+    else:
+        return False
+
+
+def do_download(dirpath, gzfile, url):
+    if check_dir_exist(dirpath):
+        sys.exit(0)
+    print 'Downloading CIFAR10 from %s' % (url)
+    urllib.urlretrieve(url, gzfile)
+    extract_tarfile(gzfile)
+    print 'Finished!'
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Download Cifar10 datasets')
+    parser.add_argument(
+        'file',
+        type=str,
+        choices=['py', 'bin'])
+    args = parser.parse_args()
+    if args.file == 'bin':
+        dirpath = 'cifar-10-batches-bin'
+        gzfile = 'cifar-10-binary' + '.tar.gz'
+        url = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
+        do_download(dirpath, gzfile, url)
+    else:
+        dirpath = 'cifar-10-batches-py'
+        gzfile = 'cifar-10-python' + '.tar.gz'
+
+=======================================================================
+==./examples/cifar10/run-parallel.sh
+=======================================================================
+#!/usr/bin/env sh
+../../build/bin/alexnet-parallel -epoch 4
+#../../build/bin/vgg-parallel -epoch 4
+
+=======================================================================
+==./examples/cifar10/run.sh
+=======================================================================
+#!/usr/bin/env sh
+../../build/bin/alexnet -epoch 140
+
+=======================================================================
+==./examples/imagenet/CMakeLists.txt
+=======================================================================
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
+
+IF(USE_CUDNN)
+  IF(USE_OPENCV)
+  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp ")
+    ADD_EXECUTABLE(imagenet alexnet.cc)
+    ADD_DEPENDENCIES(imagenet singa_core singa_model singa_utils singa_io)
+    TARGET_LINK_LIBRARIES(imagenet singa_core singa_utils singa_model singa_io protobuf ${SINGA_LIBKER_LIBS})
+
+    ADD_EXECUTABLE(createdata ilsvrc12.cc)
+    ADD_DEPENDENCIES(createdata singa_core singa_io singa_model singa_utils)
+    TARGET_LINK_LIBRARIES(createdata singa_core singa_utils singa_io singa_model protobuf ${SINGA_LIBKER_LIBS})
+    #SET_TARGET_PROPERTIES(createdata PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
+  ENDIF(USE_OPENCV)
+ENDIF(USE_CUDNN)
+
+=======================================================================
+==./examples/imagenet/create_data.sh
+=======================================================================
+#!/usr/bin/env sh
+../../build/bin/createdata -trainlist "imagenet/label/train.txt" -trainfolder "imagenet/ILSVRC2012_img_train" \
+  -testlist "imagenet/label/val.txt" -testfolder "imagenet/ILSVRC2012_img_val" -outdata "imagenet_data" -filesize 1280
+
+=======================================================================
+==./examples/imagenet/run.sh
+=======================================================================
+#!/usr/bin/env sh
+../../build/bin/imagenet -epoch 90 -lr 0.01 -batchsize 256 -filesize 1280 -ntrain 1281167 -ntest 50000 \
+  -data "imagenet_data" -pfreq 100 -nthreads 12
+
+=======================================================================
+==./include/singa/utils/cuda_utils.h
+=======================================================================
+// from caffe include/caffe/util/device_alternative.hpp
+#ifndef SINGA_UTILS_CUDA_UTILS_H_
+#define SINGA_UTILS_CUDA_UTILS_H_
+
+#include "singa/singa_config.h"
+#ifdef USE_CUDA
+#include <cublas_v2.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <curand.h>
+
+inline const char* cublasGetErrorString(cublasStatus_t error) {
+  switch (error) {
+  case CUBLAS_STATUS_SUCCESS:
+    return "CUBLAS_STATUS_SUCCESS";
+  case CUBLAS_STATUS_NOT_INITIALIZED:
+    return "CUBLAS_STATUS_NOT_INITIALIZED";
+  case CUBLAS_STATUS_ALLOC_FAILED:
+    return "CUBLAS_STATUS_ALLOC_FAILED";
+  case CUBLAS_STATUS_INVALID_VALUE:
+    return "CUBLAS_STATUS_INVALID_VALUE";
+  case CUBLAS_STATUS_ARCH_MISMATCH:
+    return "CUBLAS_STATUS_ARCH_MISMATCH";
+  case CUBLAS_STATUS_MAPPING_ERROR:
+    return "CUBLAS_STATUS_MAPPING_ERROR";
+  case CUBLAS_STATUS_EXECUTION_FAILED:
+    return "CUBLAS_STATUS_EXECUTION_FAILED";
+  case CUBLAS_STATUS_INTERNAL_ERROR:
+    return "CUBLAS_STATUS_INTERNAL_ERROR";
+#if CUDA_VERSION >= 6000
+  case CUBLAS_STATUS_NOT_SUPPORTED:
+    return "CUBLAS_STATUS_NOT_SUPPORTED";
+#endif
+#if CUDA_VERSION >= 6050
+  case CUBLAS_STATUS_LICENSE_ERROR:
+    return "CUBLAS_STATUS_LICENSE_ERROR";
+#endif
+  }
+  return "Unknown cublas status";
+}
+
+inline const char* curandGetErrorString(curandStatus_t error) {
+  switch (error) {
+  case CURAND_STATUS_SUCCESS:
+    return "CURAND_STATUS_SUCCESS";
+  case CURAND_STATUS_VERSION_MISMATCH:
+    return "CURAND_STATUS_VERSION_MISMATCH";
+  case CURAND_STATUS_NOT_INITIALIZED:
+    return "CURAND_STATUS_NOT_INITIALIZED";
+  case CURAND_STATUS_ALLOCATION_FAILED:
+
+=======================================================================
+==./include/singa/utils/timer.h
+=======================================================================
+#ifndef SINGA_UTILS_TIMER_H
+#define SINGA_UTILS_TIMER_H
+
+#include <chrono>
+
+namespace singa {
+
+/// For benchmarking the time cost of operations.
+class Timer {
+ public:
+  typedef std::chrono::duration<int> Seconds;
+  typedef std::chrono::duration<int, std::milli> Milliseconds;
+  typedef std::chrono::duration<int, std::ratio<60 * 60>> Hours;
+  typedef std::chrono::duration<int, std::micro> Microseconds;
+
+  /// Init the internal time point to the current time
+  Timer() { Tick(); }
+  /// Reset the internal time point to the current time
+  void Tick() { last_ = std::chrono::high_resolution_clock::now(); }
+  /// Return the duration since last call to Tick() or since the creation of
+  /// Timer. The template arg must be from Second or Millisecond or Hour.
+  /// The returned value is the count of the time metric.
+  template <typename T = Milliseconds>
+  int Elapsed() const {
+    static_assert(std::is_same<T, Seconds>::value ||
+                      std::is_same<T, Milliseconds>::value ||
+                      std::is_same<T, Hours>::value ||
+                      std::is_same<T, Microseconds>::value,
+                  "Template arg must be Seconds | Milliseconds | Hours | Microseconds");
+    auto now  = std::chrono::high_resolution_clock::now();
+    return std::chrono::duration_cast<T>(now - last_).count();
+  }
+  /// Return the string rep of current wall time
+  // std::string CurrentTime();
+
+ private:
+  std::chrono::high_resolution_clock::time_point last_;
+};
+}
+#endif
+
+=======================================================================
+==./include/singa/utils/tinydir.h
+=======================================================================
+/*
+Copyright (c) 2013-2014, Cong Xu, Baudouin Feildel
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef TINYDIR_H
+#define TINYDIR_H
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#ifdef _MSC_VER
+#pragma warning (disable : 4996)
+#endif
+#else
+#include <dirent.h>
+#include <libgen.h>
+#include <sys/stat.h>
+#endif
+
+
+/* types */
+
+#define _TINYDIR_PATH_MAX 4096
+#ifdef _WIN32
+/* extra chars for the "\\*" mask */
+#define _TINYDIR_PATH_EXTRA 2
+#else
+
+=======================================================================
+==./src/CMakeLists.txt
+=======================================================================
+# generate protobuf sources 
+
+FILE(GLOB proto_files proto/*.proto) 
+protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_files})
+IF (USE_PYTHON)
+    protobuf_generate_python(proto_pys ${proto_files})
+ENDIF()
+INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include")
+
+#message(STATUS "include: ${CMAKE_BINARY_DIR} ")
+#message(STATUS "srcs: ${proto_srcs}")
+#message(STATUS "hdrs: ${proto_hdrs}")
+#message(STATUS "pys: ${proto_pys}")
+ADD_LIBRARY(singa_proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_pys})
+FOREACH(fil ${proto_hdrs})
+    ADD_CUSTOM_COMMAND(
+        TARGET singa_proto PRE_BUILD
+        COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/include/singa/proto"
+        COMMAND ${CMAKE_COMMAND} -E copy ${fil} "${CMAKE_BINARY_DIR}/include/singa/proto"
+        #COMMAND ${CMAKE_COMMAND} -E echo "copy done"
+        )
+ENDFOREACH()
+LIST(APPEND SINGA_LINKER_LIBS singa_proto)
+
+SET(PREVIOUS_LINKER_LIBS ${SINGA_LINKER_LIBS})
+
+#FILE(GLOB_RECURSE utils_source ${CMAKE_CURRENT_SOURCE_DIR}/utils/ "*.cc")
+AUX_SOURCE_DIRECTORY(utils utils_source)
+#message(STATUS "UTILS ${utils_source}")
+ADD_LIBRARY(singa_utils SHARED ${utils_source})
+TARGET_LINK_LIBRARIES(singa_utils ${SINGA_LINKER_LIBS})
+LIST(APPEND SINGA_LINKER_LIBS singa_utils)
+
+#FILE(GLOB_RECURSE core_source ${CMAKE_CURRENT_SOURCE_DIR}/core/ "*.cc")
+AUX_SOURCE_DIRECTORY(core/device core_source)
+AUX_SOURCE_DIRECTORY(core/memory core_source)
+AUX_SOURCE_DIRECTORY(core/scheduler core_source)
+AUX_SOURCE_DIRECTORY(core/tensor core_source)
+IF (USE_CUDA)
+    FILE(GLOB_RECURSE cuda_source core "*.cu")
+    SET(FLAGS_BACKUP ${CMAKE_CXX_FLAGS})
+    SET(CMAKE_CXX_FLAGS "")
+    IF (CMAKE_BUILD_TYPE MATCHES DEBUG)
+        CUDA_COMPILE(cuda_objs SHARED ${cuda_source}
+            OPTIONS "-Xcompiler -fPIC -G -g")
+    ELSE (CMAKE_BUILD_TYPE MATCHES  DEBUG)
+        CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC")
+    ENDIF (CMAKE_BUILD_TYPE MATCHES DEBUG)
+    include_directories("${CMAKE_CURRENT_SOURCE_DIR}/core/tensor")
+    SET(CMAKE_CXX_FLAGS ${FLAGS_BACKUP})
+
+=======================================================================
+==./src/core/tensor/distribution.cl
+=======================================================================
+// This code is adapted from https://github.com/amd/OpenCL-caffe/blob/stable/src/caffe/ocl/random.cl
+
+//Note: random generator has two parts
+//first part: the open sourced threefy random generator kernel from DE Shaw Research
+//second part. we wrap the kernel up to generate uniform, bernoulli and gaussion distribution generators.
+
+//begin: the open sourced random generator from DE Shaw Research
+//https://www.deshawresearch.com/resources_random123.html
+typedef uint uint32_t;
+
+struct r123array4x32 {
+  uint32_t v[4];
+};
+
+enum r123_enum_threefry32x4 {
+  R_32x4_0_0 = 10,
+  R_32x4_0_1 = 26,
+  R_32x4_1_0 = 11,
+  R_32x4_1_1 = 21,
+  R_32x4_2_0 = 13,
+  R_32x4_2_1 = 27,
+  R_32x4_3_0 = 23,
+  R_32x4_3_1 = 5,
+  R_32x4_4_0 = 6,
+  R_32x4_4_1 = 20,
+  R_32x4_5_0 = 17,
+  R_32x4_5_1 = 11,
+  R_32x4_6_0 = 25,
+  R_32x4_6_1 = 10,
+  R_32x4_7_0 = 18,
+  R_32x4_7_1 = 20
+};
+
+inline uint32_t RotL_32(uint32_t x, unsigned int N) {
+  return (x << (N & 31)) | (x >> ((32 - N) & 31));
+}
+
+typedef struct r123array4x32 threefry4x32_ctr_t;
+typedef struct r123array4x32 threefry4x32_key_t;
+typedef struct r123array4x32 threefry4x32_ukey_t;
+
+inline threefry4x32_ctr_t threefry4x32_R(unsigned int Nrounds, threefry4x32_ctr_t in, threefry4x32_key_t k) {
+  threefry4x32_ctr_t X;
+  uint32_t ks[4 + 1];
+  int i;
+  ks[4] = 0x1BD11BDA;
+
+  {
+    ks[0] = k.v[0];
+    X.v[0] = in.v[0];
+
+=======================================================================
+==./src/python/setup.py.in
+=======================================================================
+# Always prefer setuptools over distutils
+from setuptools import setup
+
+
+setup(
+    name='singa',
+
+    version='${PACKAGE_VERSION}',
+
+    description='A General Deep Learning System',
+
+    url='https://github.com/apache/incubator-singa',
+
+    author='Apache SINGA (incubating)',
+    author_email='dev@singa.incubator.apache.org',
+
+    license='Apache 2',
+
+    classifiers=[
+        #   3 - Alpha
+        #   4 - Beta
+        #   5 - Production/Stable
+        'Development Status :: 3 - Alpha',
+
+        'Intended Audience :: Developers',
+        'Topic :: Deep Learning System ',
+
+        'License :: Apache License',
+
+        # Specify the Python versions you support here. In particular, ensure
+        # that you indicate whether you support Python 2, Python 3 or both.
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.6',
+        'Programming Language :: Python :: 2.7',
+        ],
+
+    keywords='deep learning singa apache',
+
+    packages= ['singa', 'singa.proto'],
+
+    #py_modules=["singa"],
+
+    install_requires=[
+        'numpy>=1.11.0',
+        'protobuf>=2.5.0,<3'
+        ],
+
+    #List additional groups of dependencies here (e.g. development
+    #dependencies). You can install these using the following syntax,
+    #for example:
+
+=======================================================================
+==./src/python/singa/__init__.py
+=======================================================================
+
+=======================================================================
+==./src/python/swig/numpy.i
+=======================================================================
+/* -*- C -*-  (not really, but good for syntax highlighting) */
+
+/*
+ * Copyright (c) 2005-2015, NumPy Developers.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *
+ *     * Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials provided
+ *        with the distribution.
+ *
+ *     * Neither the name of the NumPy Developers nor the names of any
+ *        contributors may be used to endorse or promote products derived
+ *        from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef SWIGPYTHON
+
+%{
+#ifndef SWIG_FILE_WITH_INIT
+#define NO_IMPORT_ARRAY
+#endif
+#include "stdio.h"
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#include <numpy/arrayobject.h>
+%}
+
+/**********************************************************************/
+
+%fragment("NumPy_Backward_Compatibility", "header")
+{
+
+=======================================================================
+==./test/CMakeLists.txt
+=======================================================================
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
+
+IF(ENABLE_DIST)
+  ADD_EXECUTABLE(test_ep "singa/test_ep.cc")
+  ADD_DEPENDENCIES(test_ep singa_io)
+  TARGET_LINK_LIBRARIES(test_ep singa_utils singa_io protobuf ${SINGA_LINKER_LIBS})
+ENDIF()
+
+ADD_LIBRARY(gtest STATIC EXCLUDE_FROM_ALL "gtest/gtest.h" "gtest/gtest-all.cc")
+
+AUX_SOURCE_DIRECTORY(singa singa_test_source)
+LIST(REMOVE_ITEM singa_test_source "singa/test_ep.cc")
+
+IF(NOT USE_OPENCL)
+    MESSAGE(STATUS "Skipping OpenCL tests")
+    LIST(REMOVE_ITEM singa_test_source "singa/test_opencl.cc")
+ENDIF()
+
+
+ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
+ADD_DEPENDENCIES(test_singa singa_core singa_utils)
+#MESSAGE(STATUS "link libs" ${singa_linker_libs})
+TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
+    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
+IF(UNIX AND (NOT APPLE))
+    LIST(APPEND LINK_FLAGS "-pthread")
+ENDIF()
+SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
+
+=======================================================================
+==./test/gtest/CMakeLists.txt
+=======================================================================
+
+=======================================================================
+==./test/gtest/gtest-all.cc
+=======================================================================
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// Google C++ Testing Framework (Google Test)
+//
+// Sometimes it's desirable to build Google Test by compiling a single file.
+// This file serves this purpose.
+
+// This line ensures that gtest.h can be compiled on its own, even
+// when it's fused.
+#include "gtest/gtest.h"
+
+// The following lines pull in the real gtest *.cc files.
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+
+=======================================================================
+==./test/gtest/gtest.h
+=======================================================================
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the public API for Google Test.  It should be
+// included by any test program that uses Google Test.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
+//
+// Acknowledgment: Google Test borrowed the idea of automatic test
+// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
+// easyUnit framework.
+
+
+=======================================================================
+==./test/gtest/gtest_main.cc
+=======================================================================
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdio.h>
+
+#include "gtest/gtest.h"
+
+GTEST_API_ int main(int argc, char **argv) {
+  printf("Running main() from gtest_main.cc\n");
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
+=======================================================================
+==./test/python/test_layer.py
+=======================================================================
+import sys
+import os
+import unittest
+import numpy as np
+
+#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+
+from singa import layer
+from singa import device
+from singa import tensor
+from singa.proto import model_pb2
+
+
+def _tuple_to_string(t):
+    lt = [str(x) for x in t]
+    return '(' + ', '.join(lt) + ')'
+
+
+class TestPythonLayer(unittest.TestCase):
+
+    def check_shape(self, actual, expect):
+        self.assertEqual(actual, expect, 'shape mismatch, actual shape is %s'
+                         ' exepcted is %s' % (_tuple_to_string(actual),
+                                              _tuple_to_string(expect))
+                         )
+
+    def setUp(self):
+        layer.engine='singacpp'
+        self.w = {'init': 'Xavier', 'regularizer': 1e-4}
+        self.b = {'init': 'Constant', 'value': 0}
+        self.sample_shape = None
+
+    def test_conv2D_shape(self):
+        in_sample_shape = (3, 224, 224)
+        conv = layer.Conv2D('conv', 64, 3, 1, W_specs=self.w, b_specs=self.b,
+                            input_sample_shape=in_sample_shape)
+        out_sample_shape = conv.get_output_sample_shape()
+        self.check_shape(out_sample_shape, (64, 224, 224))
+
+    def test_conv2D_forward_backward(self):
+        in_sample_shape = (1, 3, 3)
+        conv = layer.Conv2D('conv', 1, 3, 2, W_specs=self.w, b_specs=self.b,
+                            pad=1, input_sample_shape=in_sample_shape)
+        # cuda = device.create_cuda_gpu()
+        # conv.to_device(cuda)
+        params = conv.param_values()
+
+        raw_x = np.arange(9, dtype=np.float32) + 1
+        x = tensor.from_numpy(raw_x)
+        x.reshape((1, 1, 3, 3))
+
+=======================================================================
+==./test/singa/test_layer.cc
+=======================================================================
+#include "gtest/gtest.h"
+#include "singa/model/layer.h"
+#include "singa/singa_config.h"
+
+TEST(Layer, CreateLayer) {
+  std::vector<std::string> types{
+      "convolution", "dense", "dropout", "relu", "batchnorm",
+      "flatten",     "lrn",   "pooling", "prelu",      "softmax"};
+  for (auto type : types) {
+    auto layer = singa::CreateLayer("singacpp_" + type);
+    // EXPECT_EQ(layer->layer_type(), type);
+  }
+}
+
+#ifdef USE_CUDNN
+TEST(Layer, CreateCudnnLayer) {
+  std::vector<std::string> types{
+      "convolution", "dropout", "relu", "batchnorm",
+      "lrn",   "pooling", "softmax"};
+#if CUDNN_VERSION_MAJOR >= 5
+  types.push_back("dropout");
+#endif
+  for (auto type : types) {
+    auto layer = singa::CreateLayer("cudnn_" + type);
+    // EXPECT_EQ(layer->layer_type(), type);
+  }
+}
+#endif
+
+=======================================================================
+==./test/singa/test_tensor.cc
+=======================================================================
+#include "gtest/gtest.h"
+#include "singa/core/tensor.h"
+using singa::Tensor;
+using singa::Shape;
+using singa::Device;
+
+TEST(TensorTest, TestConstructor) {
+  singa::Tensor float_t(singa::Shape{2,3});
+  EXPECT_EQ(6u, float_t.Size());
+  EXPECT_EQ(sizeof(float) * 6, float_t.MemSize());
+  EXPECT_EQ(singa::kFloat32, float_t.data_type());
+  auto s = float_t.shape();
+  EXPECT_EQ(s[0], 2u);
+  EXPECT_EQ(s[1], 3u);
+
+  EXPECT_NE(float_t.device(), nullptr);
+
+  singa::Tensor float16_t(Shape{2,3}, singa::kFloat16);
+  EXPECT_EQ(singa::kFloat16, float16_t.data_type());
+  EXPECT_EQ(6u, float16_t.Size());
+  EXPECT_EQ(12u, float16_t.block()->size());
+
+  singa::Tensor x(float16_t);
+  EXPECT_EQ(float16_t.Size(), x.Size());
+  EXPECT_EQ(float16_t.block(), x.block());
+  EXPECT_EQ(float16_t.data_type(), x.data_type());
+  EXPECT_EQ(float16_t.device(), x.device());
+
+  singa::Tensor y = float16_t;
+  EXPECT_EQ(float16_t.Size(), x.Size());
+  EXPECT_EQ(float16_t.block(), x.block());
+  EXPECT_EQ(float16_t.data_type(), x.data_type());
+  EXPECT_EQ(float16_t.device(), x.device());
+}
+
+TEST(TensorClass, Reshape) {
+  Tensor t;
+  t.Reshape(Shape{2,3});
+  EXPECT_TRUE((Shape{2,3} == t.shape()));
+
+  t.Reshape(Shape{3,3, 4});
+  EXPECT_TRUE((Shape{3,3, 4} == t.shape()));
+
+  t.Reshape(Shape{12});
+  EXPECT_TRUE((Shape{12} == t.shape()));
+
+  Tensor o;
+  EXPECT_TRUE(o.shape() != t.shape());
+  o.Reshape(Shape{3, 3});
+  EXPECT_TRUE(o.shape() != t.shape());
+
+=======================================================================
+==./test/singa/test_tensor_math.cc
+=======================================================================
+#include "gtest/gtest.h"
+#include "singa/core/tensor.h"
+using singa::Tensor;
+using singa::Shape;
+using singa::Device;
+
+class TestTensorMath : public ::testing::Test {
+ protected:
+  virtual void SetUp() {
+    a.Reshape(singa::Shape{6});
+    b.Reshape(singa::Shape{6});
+    c.Reshape(singa::Shape{6, 1});
+    d.Reshape(singa::Shape{3, 2});
+    e.Reshape(singa::Shape{3, 2});
+
+    a.CopyDataFromHostPtr<float>(dat1, 6);
+    b.CopyDataFromHostPtr<float>(dat2, 6);
+    e.CopyDataFromHostPtr<float>(dat1, 6);
+  }
+  Tensor a, b, c, d, e;
+  const float dat1[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  const float dat2[6] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f};
+};
+
+TEST_F(TestTensorMath, MemberAbs) {
+  Tensor aa = a.Clone();
+  Tensor bb = b.Clone();
+  Tensor cc = aa - bb;
+  const float *dptr = cc.data<float>();
+  EXPECT_NEAR(-0.1, dptr[0], 1e-5);
+  EXPECT_NEAR(-0.1, dptr[1], 1e-5);
+  EXPECT_NEAR(-0.1, dptr[2], 1e-5);
+
+  Tensor p = Abs(cc);
+  const float *dptr1 = p.data<float>();
+  EXPECT_NEAR(0.1, dptr1[0], 1e-5);
+  EXPECT_NEAR(0.1, dptr1[1], 1e-5);
+  EXPECT_NEAR(0.1, dptr1[2], 1e-5);
+}
+
+TEST_F(TestTensorMath, MemberExp) {
+  Tensor p = Exp(a);
+  const float *dptr1 = p.data<float>();
+  EXPECT_NEAR(exp(1.0f), dptr1[0], 1e-5);
+  EXPECT_NEAR(exp(2.0f), dptr1[1], 1e-5);
+  EXPECT_NEAR(exp(3.0f), dptr1[2], 1e-5);
+}
+
+TEST_F(TestTensorMath, MemberLog) {
+  Tensor p = Log(a);
+
+=======================================================================
+==./test/singa/test_timer.cc
+=======================================================================
+#include "gtest/gtest.h"
+#include "singa/utils/timer.h"
+
+#include <chrono>
+#include <thread>
+
+TEST(TimerTest, TestTick) {
+  singa::Timer t;
+  std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+  int time = t.Elapsed<singa::Timer::Milliseconds>();
+  EXPECT_GE(time, 1000);
+}
+
+=======================================================================
+==./tool/cpplint.py
+=======================================================================
+#!/usr/bin/env python
+#
+# Copyright (c) 2009 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#    * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#    * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Does google-lint on c++ files.
+
+The goal of this script is to identify places in the code that *may*
+be in non-compliance with google style.  It does not attempt to fix
+up these problems -- the point is to educate.  It does also not
+attempt to find all problems, or to ensure that everything it does
+find is legitimately a problem.
+
+In particular, we can get very confused by /* and // inside strings!
+We do a small hack, which is to ignore //'s with "'s after them on the
+same line, but it is far from perfect (in either direction).
+"""
+
+import codecs
+import copy
+import getopt
+import math  # for log
+import os
+import re
+import sre_compile

[43/51] [abbrv] incubator-singa git commit: SINGA-240 Add license for singa source

Posted by wa...@apache.org.

SINGA-240 Add license for singa source

Remove copyright in source headers.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9ee16e00
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9ee16e00
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9ee16e00

Branch: refs/heads/master
Commit: 9ee16e006a01a2d0edd19ddcc52183372ff49346
Parents: 1c5ca22
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Wed Aug 17 13:41:39 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 18 01:31:36 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                    | 11 +----------
 cmake/Cuda.cmake                  |  2 --
 cmake/Dependencies.cmake          |  2 --
 cmake/Templates/singa_config.h.in |  2 --
 cmake/Thirdparty/FindCBLAS.cmake  |  2 --
 cmake/Thirdparty/FindCUDNN.cmake  |  2 --
 cmake/Thirdparty/FindGlog.cmake   |  2 --
 cmake/Thirdparty/FindLMDB.cmake   |  2 --
 cmake/Utils.cmake                 |  2 --
 examples/CMakeLists.txt           |  2 --
 examples/cifar10/CMakeLists.txt   |  2 --
 examples/cifar10/download_data.py |  2 --
 examples/cifar10/run-parallel.sh  |  2 --
 examples/cifar10/run.sh           |  2 --
 examples/imagenet/CMakeLists.txt  |  2 --
 examples/imagenet/create_data.sh  |  2 --
 examples/imagenet/run.sh          |  2 --
 examples/index.rst                |  2 --
 include/singa/utils/timer.h       |  2 --
 jenkins.sh                        |  1 -
 src/CMakeLists.txt                |  2 --
 src/python/setup.py.in            |  2 --
 src/python/singa/__init__.py      |  2 --
 test/CMakeLists.txt               |  2 --
 test/gtest/CMakeLists.txt         |  2 --
 test/python/test_layer.py         |  2 --
 test/singa/test_layer.cc          |  2 --
 test/singa/test_tensor.cc         |  2 --
 test/singa/test_tensor_math.cc    |  2 --
 test/singa/test_timer.cc          |  2 --
 30 files changed, 1 insertion(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5646870..2bed134 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -40,7 +38,7 @@ SET(SINGA_INCLUDE_DIR
 INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
 
 OPTION(USE_CBLAS "Use CBlas libs" ON)
-OPTION(USE_CUDA "Use Cuda libs" ON)
+OPTION(USE_CUDA "Use Cuda libs" OFF)
 OPTION(USE_CUDNN "Use Cudnn libs" ON)
 OPTION(USE_OPENCV "Use opencv" OFF)
 OPTION(USE_LMDB "Use LMDB libs" OFF)
@@ -77,10 +75,3 @@ ENDIF()
 ADD_SUBDIRECTORY(src)
 ADD_SUBDIRECTORY(test)
 ADD_SUBDIRECTORY(examples)
-
-INSTALL(DIRECTORY include/singa DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
-INSTALL(FILES ${CMAKE_BINARY_DIR}/include/singa/singa_config.h DESTINATION
-    ${CMAKE_INSTALL_PREFIX}/include/singa)
-INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/lib DESTINATION ${CMAKE_INSTALL_PREFIX}
-    PATTERN "*libgtest.a" EXCLUDE )
-#INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/bin DESTINATION ${CMAKE_INSTALL_PREFIX})

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Cuda.cmake
----------------------------------------------------------------------
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index 37d5878..e590bb1 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 62c7d52..b5fda6d 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Templates/singa_config.h.in
----------------------------------------------------------------------
diff --git a/cmake/Templates/singa_config.h.in b/cmake/Templates/singa_config.h.in
index 011489c..f3500d0 100644
--- a/cmake/Templates/singa_config.h.in
+++ b/cmake/Templates/singa_config.h.in
@@ -1,6 +1,4 @@
 /**
- * Copyright 2015 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Thirdparty/FindCBLAS.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindCBLAS.cmake b/cmake/Thirdparty/FindCBLAS.cmake
index 6e9ce5f..76c9118 100644
--- a/cmake/Thirdparty/FindCBLAS.cmake
+++ b/cmake/Thirdparty/FindCBLAS.cmake
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Thirdparty/FindCUDNN.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindCUDNN.cmake b/cmake/Thirdparty/FindCUDNN.cmake
index 7792f58..451b79b 100644
--- a/cmake/Thirdparty/FindCUDNN.cmake
+++ b/cmake/Thirdparty/FindCUDNN.cmake
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Thirdparty/FindGlog.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindGlog.cmake b/cmake/Thirdparty/FindGlog.cmake
index c0e857a..e18c602 100644
--- a/cmake/Thirdparty/FindGlog.cmake
+++ b/cmake/Thirdparty/FindGlog.cmake
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Thirdparty/FindLMDB.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindLMDB.cmake b/cmake/Thirdparty/FindLMDB.cmake
index fbe34fc..0553b19 100644
--- a/cmake/Thirdparty/FindLMDB.cmake
+++ b/cmake/Thirdparty/FindLMDB.cmake
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/cmake/Utils.cmake
----------------------------------------------------------------------
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index 49494f0..a0373b8 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index bf27685..0bb6c2f 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/cifar10/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/examples/cifar10/CMakeLists.txt b/examples/cifar10/CMakeLists.txt
index 6814958..313c0eb 100644
--- a/examples/cifar10/CMakeLists.txt
+++ b/examples/cifar10/CMakeLists.txt
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/cifar10/download_data.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/download_data.py b/examples/cifar10/download_data.py
index 8153030..7129b03 100755
--- a/examples/cifar10/download_data.py
+++ b/examples/cifar10/download_data.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/cifar10/run-parallel.sh
----------------------------------------------------------------------
diff --git a/examples/cifar10/run-parallel.sh b/examples/cifar10/run-parallel.sh
index e4f0221..91b3b54 100755
--- a/examples/cifar10/run-parallel.sh
+++ b/examples/cifar10/run-parallel.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env sh
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/cifar10/run.sh
----------------------------------------------------------------------
diff --git a/examples/cifar10/run.sh b/examples/cifar10/run.sh
index a544234..279edf0 100755
--- a/examples/cifar10/run.sh
+++ b/examples/cifar10/run.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env sh
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/imagenet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/examples/imagenet/CMakeLists.txt b/examples/imagenet/CMakeLists.txt
index 9675f4c..465245a 100644
--- a/examples/imagenet/CMakeLists.txt
+++ b/examples/imagenet/CMakeLists.txt
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/imagenet/create_data.sh
----------------------------------------------------------------------
diff --git a/examples/imagenet/create_data.sh b/examples/imagenet/create_data.sh
index 6a9eea9..4c2c034 100755
--- a/examples/imagenet/create_data.sh
+++ b/examples/imagenet/create_data.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env sh
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/imagenet/run.sh
----------------------------------------------------------------------
diff --git a/examples/imagenet/run.sh b/examples/imagenet/run.sh
index aeffba3..6277d23 100755
--- a/examples/imagenet/run.sh
+++ b/examples/imagenet/run.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env sh
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index 8df22fd..b501b36 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,6 +1,4 @@
 .. 
-.. Copyright 2015 The Apache Software Foundation
-.. 
 .. Licensed to the Apache Software Foundation (ASF) under one
 .. or more contributor license agreements.  See the NOTICE file
 .. distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/include/singa/utils/timer.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/timer.h b/include/singa/utils/timer.h
index f54bce8..291c733 100644
--- a/include/singa/utils/timer.h
+++ b/include/singa/utils/timer.h
@@ -1,6 +1,4 @@
 /**
- * Copyright 2015 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/jenkins.sh
----------------------------------------------------------------------
diff --git a/jenkins.sh b/jenkins.sh
index ee87d40..347a55e 100644
--- a/jenkins.sh
+++ b/jenkins.sh
@@ -1,6 +1,5 @@
 #!/usr/bin/env sh
 #/**
-# * Copyright 2015 The Apache Software Foundation
 # *
 # * Licensed to the Apache Software Foundation (ASF) under one
 # * or more contributor license agreements.  See the NOTICE file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 425986f..174f05e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/src/python/setup.py.in
----------------------------------------------------------------------
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index 004222c..881cd30 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index 750eb60..c81c6ef 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7928bc1..7db784c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,6 +1,4 @@
 #
-# Copyright 2015 The Apache Software Foundation
-# 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/test/gtest/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt
index 750eb60..5b22dae 100644
--- a/test/gtest/CMakeLists.txt
+++ b/test/gtest/CMakeLists.txt
@@ -1,5 +1,3 @@
-#
-# Copyright 2015 The Apache Software Foundation
 # 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/test/python/test_layer.py
----------------------------------------------------------------------
diff --git a/test/python/test_layer.py b/test/python/test_layer.py
index f98a3c0..141cf56 100644
--- a/test/python/test_layer.py
+++ b/test/python/test_layer.py
@@ -1,5 +1,3 @@
-#
-# Copyright 2015 The Apache Software Foundation
 # 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/test/singa/test_layer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_layer.cc b/test/singa/test_layer.cc
index c913a17..7726a4a 100644
--- a/test/singa/test_layer.cc
+++ b/test/singa/test_layer.cc
@@ -1,6 +1,4 @@
 /**
- * Copyright 2015 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/test/singa/test_tensor.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc
index a6d6bab..316b996 100644
--- a/test/singa/test_tensor.cc
+++ b/test/singa/test_tensor.cc
@@ -1,6 +1,4 @@
 /**
- * Copyright 2015 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/test/singa/test_tensor_math.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc
index 0c0b4f8..267905d 100644
--- a/test/singa/test_tensor_math.cc
+++ b/test/singa/test_tensor_math.cc
@@ -1,6 +1,4 @@
 /**
- * Copyright 2015 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9ee16e00/test/singa/test_timer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_timer.cc b/test/singa/test_timer.cc
index 2b3bd05..76d3597 100644
--- a/test/singa/test_timer.cc
+++ b/test/singa/test_timer.cc
@@ -1,6 +1,4 @@
 /**
- * Copyright 2015 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

[07/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

1. copy the 'examples' folder into docs/ to generate htmls files using the README.md files
2. add software_stack.md to describe the major data structures of v1.0
3. add device.rst to introduce the Device APIs


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/3299b0c2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/3299b0c2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/3299b0c2

Branch: refs/heads/master
Commit: 3299b0c29e62f61dc7f74918fb01a0fa2c93a4a4
Parents: 6b2ff3c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 11 23:13:12 2016 +0800
Committer: wangwei <wa...@gmail.com>
Committed: Fri Aug 12 00:04:20 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                      |   1 +
 doc/_static/images/singav1-sw.png | Bin 0 -> 24326 bytes
 doc/conf.py                       |   9 +--
 doc/docs.rst                      |   6 +-
 doc/docs/device.rst               |  47 ++++++++++++++++
 doc/docs/examples.rst             |   6 ++
 doc/docs/index.rst                |  15 ++---
 doc/docs/jp/index.md              |  23 --------
 doc/docs/kr/index.md              |  23 --------
 doc/docs/software_stack.md        |  99 +++++++++++++++++++++++++++++++++
 doc/docs/tensor.md                |   7 +++
 doc/docs/zh/index.md              |  10 ++--
 12 files changed, 177 insertions(+), 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index 62a2236..c6eddf1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -50,6 +50,7 @@ clean:
 
 .PHONY: html
 html:
+	cp -rf ../examples docs/
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/_static/images/singav1-sw.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/singav1-sw.png b/doc/_static/images/singav1-sw.png
new file mode 100644
index 0000000..e443c6e
Binary files /dev/null and b/doc/_static/images/singav1-sw.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 86dc031..20ba51a 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -16,9 +16,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, '../src/python/singa/')
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,7 +36,7 @@ source_parsers = {
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-   
+'sphinx.ext.autodoc'
 ]
 
 # Add any paths that contain templates here, relative to this directory.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
index 2ebea60..400b12a 100644
--- a/doc/docs.rst
+++ b/doc/docs.rst
@@ -2,7 +2,5 @@ Documentation
 =============
 
 .. toctree::
-	docs/index
- 	docs/zh/index
-	docs/jp/index
-	docs/kr/index
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
new file mode 100644
index 0000000..aa5defb
--- /dev/null
+++ b/doc/docs/device.rst
@@ -0,0 +1,47 @@
+Device
+=======
+
+
+The Device abstract represent a hardware device with memory and compuation units.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Create devices
+---------------
+
+Python API
+~~~~~~~~~~
+
+.. autofunction:: device.create_cuda_gpus
+
+.. autofunction:: device.create_cuda_gpus_on
+
+.. autofunction:: device.create_cuda_gpu_on
+
+.. autofunction:: device.get_default_device
+
+
+The following code shows how to create devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+
+CPP API
+~~~~~~~
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
new file mode 100644
index 0000000..b0b2af8
--- /dev/null
+++ b/doc/docs/examples.rst
@@ -0,0 +1,6 @@
+Examples
+========
+
+.. toctree::
+
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index a6a1b49..8a74976 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -2,15 +2,8 @@ English
 =======
 
 .. toctree::
-	overview
-        installation
-	quick-start
-        programming-guide
-        distributed-training
-        data
-        checkpoint
-        python
-        test
-        gpu
-        examples
 
+   installation
+   software_stack
+   device
+   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/jp/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/jp/index.md b/doc/docs/jp/index.md
deleted file mode 100644
index 6679198..0000000
--- a/doc/docs/jp/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \u6700\u65b0\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8
-
----
-
-* [\u30a4\u30f3\u30c8\u30ed\u30c0\u30af\u30b7\u30e7\u30f3](overview.html)
-* [\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb](installation.html)
-* [\u30af\u30a4\u30c3\u30af\u30b9\u30bf\u30fc\u30c8](quick-start.html)
-* [\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0 \u30ac\u30a4\u30c9](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\u5206\u6563 \u30c8\u30ec\u30fc\u30cb\u30f3\u30b0](distributed-training.html)
-* [\u30c7\u30fc\u30bf\u306e\u6e96\u5099](data.html)
-* [Checkpoint \u3068 Resume](checkpoint.html)
-* [\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u30c6\u30b9\u30c8 \u3068 \u7279\u5fb4\u62bd\u51fa](test.html)
-* [\u30b5\u30f3\u30d7\u30eb](examples.html)
-    * Feed-forward \u30e2\u30c7\u30eb
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/kr/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/kr/index.md b/doc/docs/kr/index.md
deleted file mode 100644
index 990d5d9..0000000
--- a/doc/docs/kr/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \ucd5c\uc2e0 \ubb38\uc11c
-
----
-
-* [\uac1c\uc694](overview.html)
-* [\uc778\uc2a4\ud1a8](installation.html)
-* [\ud035 \uc2a4\ud0c0\ud2b8](quick-start.html)
-* [\ud504\ub85c\uadf8\ub798\ubc0d \uac00\uc774\ub4dc](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\ubd84\uc0b0 \ud2b8\ub808\uc774\ub2dd](distributed-training.html)
-* [\ub370\uc774\ud130 \uc900\ube44](data.html)
-* [Checkpoint \uc640 Resume](checkpoint.html)
-* [\uc131\ub2a5\ud14c\uc2a4\ud2b8 \ubc0f \ud2b9\uc9d5\ucd94\ucd9c](test.html)
-* [\uc0d8\ud50c](examples.html)
-    * Feed-forward \ubaa8\ub378
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
new file mode 100644
index 0000000..eaf8362
--- /dev/null
+++ b/doc/docs/tensor.md
@@ -0,0 +1,7 @@
+# Tensor
+
+
+##
+
+
+##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
index c44a2cf..4b49d5f 100644
--- a/doc/docs/zh/index.md
+++ b/doc/docs/zh/index.md
@@ -1,7 +1,9 @@
 SINGA \u4e2d\u6587\u6587\u6863
----
+==============
 
-* [\u7b80\u4ecb](overview.html)
-* [\u5b89\u88c5](installation_source.html)
-* [\u4f7f\u7528\u6307\u5357](programming-guide.html)
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide

[25/51] [abbrv] incubator-singa git commit: Merge commits from wenfeng for the installation page.

Posted by wa...@apache.org.

Merge commits from wenfeng for the installation page.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/2c049d68
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/2c049d68
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/2c049d68

Branch: refs/heads/master
Commit: 2c049d683f1bd4f590e87d9dd2f3fe0d2a1fb591
Parents: 30731ee 410f238
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 19:37:46 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 19:37:46 2016 +0800

----------------------------------------------------------------------
 doc/docs/installation.md     | 297 +++++++++++++++++++++++++++++---------
 src/python/setup.py.in       |   5 +-
 src/python/singa/__init__.py | 240 ------------------------------
 src/python/singa/command.py  | 240 ++++++++++++++++++++++++++++++
 4 files changed, 468 insertions(+), 314 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2c049d68/doc/docs/installation.md
----------------------------------------------------------------------
diff --cc doc/docs/installation.md
index 4cf4ea7,6bfdee3..5d3c8a2
--- a/doc/docs/installation.md
+++ b/doc/docs/installation.md
@@@ -1,71 -1,138 +1,226 @@@
--# Building SINGA from source
--
--## Dependencies
--
--### Required
- * Google Protobuf (>=2.5)
 -* Google Protobuf (>=2.5,<3)
--* BLAS (tested with OpenBLAS >=0.2.10)
--* CUDA (tested with 6.5, 7.0 and 7.5)
--* CUDNN (v4 and v5)
--* cmake (>=2.6)
--
--Users must install the above mandatory libraries.
--Currently CUDA and CUDNN are also mandatory, but it would become optional later.
--
--### Optional
--* Glog
--* OpenCV (tested with 2.4.8)
--* LMDB (tested with 0.9)
--
--
--## Instructions
--
--Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
--
--
--    $ git clone https://github.com/apache/incubator-singa.git
--    $ cd incubator-singa/
--    # switch to dev branch
--    $ git checkout dev
--
--
--If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
--which could be downloaded as
--
--    $ git submodule init
--    $ git submodule update
- 
--
- ### Linux & MacOS 
--
- GCC (>=4.8.1) is required to compile SINGA on Linux.
- You can use gcc compiler to do the installation on MacOS following the
- steps in Linux installation. 
- In addition, you can also install singa via clang compiler following the
- commands in this section.
 -### Linux OS
--
 -GCC (>=4.8.1) is required to compile SINGA on Linux OS.
--In SINGA_ROOT, execute the following commands for compiling SINGA,
--
--    $ mkdir build && cd build
 -    # generate Makefile for compilation
--    $ cmake ..
 -    # compile SINGA
--    $ make
--
- Note that if you are using CUDNN and it is not installed under system default
- folder, you need to let cmake know the paths to CUDNN,
 -Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
--
--    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
--    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
--
- You can use `ccmake ..` to configure the compilation options including using
- LMDB, GLOG, etc. In addition, you can set the proper search paths for the
- dependent libraries.
 -You can use `cmake ..` to configure the compilation options including using
 -LMDB, GLOG, etc.
--
--After compiling SINGA, you can run the unit tests by
--
--    $ ./bin/test_singa
--
--You can see all the testing cases with testing results. If SINGA passes all
--tests, then you have successfully installed SINGA. Please proceed to try the examples!
 -
 -
 -### MacOS
--
 -Currently only Linux OS is officially support.
--
--### Windows
 -
 -Currently only Linux OS is officially support.
 -
 -
 -# Install SINGA Python Module
 -
 -SINGA provide a python binding for python programers. Users can either install from source or 
 -from pre-built wheel file.
 -
 -## Install from source
 -
 -### Required
 -* python(==2.7)   
 -* pip(>=1.5)
 -* SWIG(>=3.0)   
 -* numpy(>=1.11.0)   
 -* Google protobuf(>=2.5,<3)   
 -
 -
 -### Configuration
 -To build SINGA python package, users should turn on python building switch in cmake config file: "CMakeList.txt"
 -
 -    OPTION(USE_PYTHON "Generate py wrappers" ON)
 -
 -### Instructions
 -Follow the instructions in the above sections to build SINGA from source,
 -
 -After that, execute the following commands:
 -
 -    # under the build directory
 -    $ cd python
 -    $ sudo pip install . 
 -
 -Then singa package should be installed in the corresponding python library. 
 -
 -## Pip Install from wheel 
 -
 -Install pip if it is not already installed:
 -
 -    $ sudo apt-get install python-pip python-dev
 -
 -Then, select the correct binary to install:
 -
 -    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
 -
 -    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
 -
 -    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
 -   
 -    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
 -   
 -Install SINGA:
 -
 -    $ sudo pip install --upgrade $SINGA_WHEEL_URL
 -
 -### build wheel file from source
 -
 -Users can build wheel file from source. After build SINGA, execute the following commands:
 -
 -    # under the build directory
 -    $ cd python
 -    $ python setup.py bdist_wheel
 -
 -Then users may get built wheel file under "dist" directory
++# Installation
++
++## Dependencies
++
++### Required
++* google protobuf (>=2.5,<3)
++* blas (tested with openblas >=0.2.10)
++* cmake (>=2.6)
++
++
++### Optional
++* glog
++* opencv (tested with 2.4.8)
++* lmdb (tested with 0.9)
++* cuda (tested with 6.5, 7.0 and 7.5)
++* cudnn (v4 and v5)
++
++PySINGA has additional dependencies
++
++* python(==2.7)
++* pip(>=1.5)
++* swig(>=3.0)
++* numpy(>=1.11.0)
++* openblas (>=0.2.10)
++
++Users are encouraged to install the cuda and [cudnn](https://developer.nvidia.com/cudnn) for running SINGA on GPUs to
++get better performance.
++Most of the dependent libraries could be installed via package mangers like
++apt-get or homebrew.
++
++    # for ubuntu users, tested on 14.04
++    sudo apt-get install libprotobuf-dev libopencv-dev protobuf-compiler libgoogle-glog-dev liblmdb-dev, python2.7-dev, python-pip, python-numpy
++
++    # for Mac OS users
++    brew install -vd glog lmdb
++    brew tap homebrew/science
++    brew install opencv
++    brew install openblas
++    brew tap homebrew/python
++    brew install python
++    brew install numpy  --with-openblas
++
++
++## Install PySINGA
++
++### From wheel
++
++After installing the dependencies for SINGA and PySINGA, please download the correct binary:
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++Then, run the following command
++
++    $ sudo pip install --upgrade $SINGA_WHEEL_URL
++
++If you do not have sudo right, you can run `pip install` in a python virtual environment.
++
++
++### From source
++
++Please compile SINGA from source (see the next section) with the 'USE_PYTHON' option on,
++and then run the following commands,
++
++    # under the build directory
++    $ cd python
++    $ sudo pip install .
++
++If you are using a virtual environment, you can ignore the `sudo` keyword.
++
++Developers can build the wheel file via
++
++    # under the build directory
++    $ cd python
++    $ python setup.py bdist_wheel
++
++
++The generated wheel file is under "dist" directory
++
++
++## Build SINGA from source
++
++Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
++
++    $ git clone https://github.com/apache/incubator-singa.git
++    $ cd incubator-singa/
++
++If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
++which could be downloaded as
++
++    $ git submodule init
++    $ git submodule update
++
++
++### Linux & MacOS
++
++GCC (>=4.8.1) is required to compile SINGA on Linux.
++For Mac OS users, you can use either GCC or Clang.
++
++In SINGA_ROOT, execute the following commands for compiling SINGA,
++
++    $ mkdir build && cd build
++    $ cmake ..
++    $ make
++    $ make install
++
++Note that if you are using CUDNN and it is not installed under system default
++folder, you need to let cmake know the paths to CUDNN,
++
++    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
++    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
++
++You can use `ccmake ..` to configure the compilation options including using
++generating python binding and changing the installation folder.
++If the dependent libraries are not in the system default paths, you need to export
++the following environment variables
++
++    export CMAKE_INCLUDE_PATH=<path to your header file folder>
++    export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++After compiling SINGA, you can run the unit tests by
++
++    $ ./bin/test_singa
++
++You can see all the testing cases with testing results. If SINGA passes all
++tests, then you have successfully installed SINGA. Please proceed to try the examples!
++
++
++### Windows
++To be added.
++
++
++## FAQ
++
++* Q: Error from running `cmake ..`, which cannot find the dependent libraries.
++
++    A: If you haven't installed the libraries, please install them. If you installed
++    the libraries in a folder that is outside of the system folder, e.g. /usr/local,
++    please export the following variables
++
++        export CMAKE_INCLUDE_PATH=<path to your header file folder>
++        export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from `make`, e.g. the linking phase
++
++    A: If your libraries are in other folders than system default paths, you need
++    to export the following varaibles
++
++    $ export LIBRARY_PATH=<path to your lib file folder>
++    $ export LD_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from header files, e.g. 'cblas.h no such file or directory exists'
++
++    A: You need to include the folder of the cblas.h into CPLUS_INCLUDE_PATH,
++    e.g.,
++
++        $ export CPLUS_INCLUDE_PATH=/opt/OpenBLAS/include:$CPLUS_INCLUDE_PATH
++
++* Q:While compiling SINGA, I get error `SSE2 instruction set not enabled`
++
++    A:You can try following command:
++
++        $ make CFLAGS='-msse2' CXXFLAGS='-msse2'
++
++* Q:I get `ImportError: cannot import name enum_type_wrapper` from google.protobuf.internal when I try to import .py files.
++
++    A: You need to install the python binding of protobuf, which could be installed via
++
++        $ sudo apt-get install protobuf
++
++    or from source
++
++        $ cd /PROTOBUF/SOURCE/FOLDER
++        $ cd python
++        $ python setup.py build
++        $ python setup.py install
++
++* Q: When I build OpenBLAS from source, I am told that I need a Fortran compiler.
++
++    A: You can compile OpenBLAS by
++
++        $ make ONLY_CBLAS=1
++
++    or install it using
++
++        $ sudo apt-get install libopenblas-dev
++
++* Q: When I build protocol buffer, it reports that GLIBC++_3.4.20 not found in /usr/lib64/libstdc++.so.6.
++
++    A9: This means the linker found libstdc++.so.6 but that library
++    belongs to an older version of GCC than was used to compile and link the
++    program. The program depends on code defined in
++    the newer libstdc++ that belongs to the newer version of GCC, so the linker
++    must be told how to find the newer libstdc++ shared library.
++    The simplest way to fix this is to find the correct libstdc++ and export it to
++    LD_LIBRARY_PATH. For example, if GLIBC++_3.4.20 is listed in the output of the
++    following command,
++
++        $ strings /usr/local/lib64/libstdc++.so.6|grep GLIBC++
++
++    then you just set your environment variable as
++
++        $ export LD_LIBRARY_PATH=/usr/local/lib64:$LD_LIBRARY_PATH
++
++* Q: When I build glog, it reports that "src/logging_unittest.cc:83:20: error: \u2018gflags\u2019 is not a namespace-name"
++
++    A: It maybe that you have installed gflags with a different namespace such as "google". so glog can't find 'gflags' namespace.
++    Because it is not necessary to have gflags to build glog. So you can change the configure.ac file to ignore gflags.
++
++        1. cd to glog src directory
++        2. change line 125 of configure.ac  to "AC_CHECK_LIB(gflags, main, ac_cv_have_libgflags=0, ac_cv_have_libgflags=0)"
++        3. autoreconf
++
++    After this, you can build glog again.
++

[27/51] [abbrv] incubator-singa git commit: Merge commits for updating layer identifers and commits for updating the installation page.

Posted by wa...@apache.org.

Merge commits for updating layer identifers and commits for updating the installation page.

Conflicts:
	examples/cifar10/vgg.py
	src/python/singa/device.py
	src/python/singa/layer.py
	src/python/singa/optimizer.py
	src/python/singa/tensor.py
	test/CMakeLists.txt


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5db7eb61
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5db7eb61
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5db7eb61

Branch: refs/heads/master
Commit: 5db7eb61ed1e5e189aad9c4b02f85cae6fa23df9
Parents: 2c049d6 9c71bd6
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 20:04:54 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 20:04:54 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                       |   9 +-
 cmake/Dependencies.cmake             |   7 ++
 cmake/Thirdparty/FindCUDNN.cmake     |   2 +-
 examples/char-rnn/train.py           |   2 +-
 examples/cifar10/alexnet.cc          |   2 +-
 examples/cifar10/alexnet.py          |   2 +-
 examples/cifar10/train.py            |   2 +-
 examples/cifar10/vgg-parallel.cc     |   1 +
 examples/cifar10/vgg.py              |  18 ++--
 examples/imagenet/alexnet.cc         |  50 ++++++-----
 examples/mnist/README.md             |  18 ++++
 examples/mnist/train.py              | 134 ++++++++++++++++++++++++++++++
 include/singa/core/device.h          |  15 ++--
 include/singa/core/tensor.h          |  19 +++++
 include/singa/model/layer.h          |  13 ++-
 include/singa/model/loss.h           |   1 -
 src/CMakeLists.txt                   |  10 +--
 src/core/device/cpp_cpu.cc           |   2 +-
 src/core/tensor/math_kernel.cu       |  53 +++++++++++-
 src/core/tensor/math_kernel.h        |  14 +++-
 src/core/tensor/tensor.cc            |  15 +++-
 src/core/tensor/tensor_math.h        |  24 ++++++
 src/core/tensor/tensor_math_cpp.h    |  42 ++++++++++
 src/core/tensor/tensor_math_cuda.h   |  40 ++++++++-
 src/model/layer/activation.cc        |  10 +++
 src/model/layer/batchnorm.cc         |   3 +
 src/model/layer/convolution.cc       |   1 +
 src/model/layer/cudnn_batchnorm.cc   |  16 ++--
 src/model/layer/dense.cc             |   3 +
 src/model/layer/dropout.cc           |   3 +
 src/model/layer/flatten.cc           |   3 +
 src/model/layer/lrn.cc               |   3 +
 src/model/layer/pooling.cc           |   1 +
 src/model/layer/prelu.cc             |   3 +
 src/model/layer/rnn.cc               |   3 +
 src/model/layer/softmax.cc           |   3 +
 src/python/singa/device.py           |   5 +-
 src/python/singa/layer.py            |  28 ++++---
 src/python/singa/optimizer.py        |  63 ++++++++------
 src/python/singa/tensor.py           |  45 +++++++---
 src/python/swig/core_device.i        |   4 +-
 src/python/swig/core_tensor.i        |  10 +++
 test/CMakeLists.txt                  |   1 -
 test/python/test_layer.py            |  11 +--
 test/singa/test_cpp_cpu.cc           |   2 +-
 test/singa/test_cudnn_batchnorm.cc   |  22 ++---
 test/singa/test_image_transformer.cc |  20 ++---
 test/singa/test_layer.cc             |   2 +-
 48 files changed, 602 insertions(+), 158 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/src/python/singa/device.py
----------------------------------------------------------------------
diff --cc src/python/singa/device.py
index eff6783,897fdf5..65824c2
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@@ -113,7 -113,7 +113,10 @@@ def create_cuda_gpu_on(device_id)
      devices = create_cuda_gpus_on([device_id])
      return devices[0]
  
 +
+ default_device = singa.Platform.GetDefaultDevice()
++
++
  def get_default_device():
      '''Get the default host device which is a CppCPU device'''
-     return singa.Platform.GetDefaultDevice()
+     return default_device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --cc src/python/singa/optimizer.py
index 7c8cc39,86e68af..14cf3c0
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@@ -54,8 -54,8 +54,8 @@@ class Optimizer(object)
          lr (float): a constant for the learning rate, mutually exclusive with
              'lr_gen'.
          momentum (float): a constant for the momentum value
-         decay (float): the coefficent for L2 regularizer, which is mutually
 -        weight_decay (float): the coefficent for L2 regularizer, which is mutually
--            exclusive with 'regularizer'.
++        weight_decay (float): the coefficent for L2 regularizer, which is
++            mutually exclusive with 'regularizer'.
          lr_gen (function): a function returns the learning rate given
              the current training step/epoch. It is mutually exclusive with lr.
              If both are not set, the apply_with_lr function should be used for
@@@ -67,7 -67,7 +67,8 @@@
              constraint would be applied inside apply_with_lr(). Users can
              also do regularization outside.
      '''
-     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
++
+     def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
          if lr is not None:
              assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
@@@ -108,11 -108,14 +109,14 @@@
              name (str): parameter name
              specs (ParamSpec): protobuf obj, including regularizer and
                  constraint, multipliers for learning rate and weight decay.
+ 
          '''
 -	assert type(specs) == model_pb2.ParamSpec, \
 -		'specs should be model_pb2.ParamSpec instance'
++        assert isinstance(specs, model_pb2.ParamSpec), \
++            'specs should be model_pb2.ParamSpec instance'
          if specs.HasField('regularizer'):
-             self.regularizers[name] = CppRegularizer(specs.constraint)
+             self.regularizers[name] = CppRegularizer(specs.regularizer)
          if specs.HasField('constraint'):
-             self.constraints[name] = CppConstraint(specs.regularizer)
+             self.constraints[name] = CppConstraint(specs.constraint)
          if specs.lr_mult != 1:
              self.learning_rate_multiplier[name] = specs.lr_mult
          if specs.decay_mult != 1:
@@@ -190,12 -193,13 +194,20 @@@ class SGD(Optimizer)
      See the base Optimizer for all arguments.
      '''
  
-     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+     def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
-         super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
 -        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
--                                  constraint)
++        super(
++            SGD,
++            self).__init__(
++            lr,
++            momentum,
++            weight_decay,
++            lr_gen,
++            regularizer,
++         constraint)
          conf = model_pb2.OptimizerConf()
-         conf.momentum = self.momentum
+         if self.momentum is not None:
+             conf.momentum = self.momentum
          conf.type = 'sgd'
          self.opt = singa.CreateOptimizer('SGD')
          self.opt.Setup(conf.SerializeToString())
@@@ -212,12 -216,13 +224,13 @@@ class Nesterov(Optimizer)
      See the base Optimizer for all arguments.
      '''
  
-     def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+     def __init__(self, lr=None, momentum=0.9, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
-         super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
 -        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
--                                       constraint)
++        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen,
++                                       regularizer, constraint)
          conf = model_pb2.OptimizerConf()
-         conf.momentum = momentum
+         if self.momentum is not None:
+             conf.momentum = momentum
          conf.type = 'nesterov'
          self.opt = singa.CreateOptimizer('Nesterov')
          self.opt.Setup(conf.SerializeToString())
@@@ -236,9 -241,9 +249,10 @@@ class AdaGrad(Optimizer)
      Args:
          epsilon (float): small number for preventing numeric error.
      '''
-     def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
++
+     def __init__(self, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
-         super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+         super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                        constraint)
          conf = model_pb2.OptimizerConf()
          conf.delta = epsilon
@@@ -262,9 -267,9 +276,9 @@@ class RMSProp(Optimizer)
          epsilon (float): small value for preventing numeric error
      '''
  
-     def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
 -    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
--                 regularizer=None, constraint=None):
-         super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
++    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None,
++                 lr_gen=None, regularizer=None, constraint=None):
+         super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                        constraint)
          conf = model_pb2.OptimizerConf()
          conf.rho = rho
@@@ -333,6 -338,6 +347,7 @@@ class CppConstraint(Constraint)
      Args:
          conf (ConstraintConf): protobuf message for the configuration.
      '''
++
      def __init__(self, conf):
          self.constraint = singa.CreateConstraint(conf.type)
          self.constraint.Setup(conf.SerializeToString())

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --cc src/python/singa/tensor.py
index 2e60554,5086fdc..6641a71
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@@ -206,7 -207,7 +207,7 @@@ class Tensor(object)
          Args:
              t (Tensor): source Tensor.
          '''
--        assert type(t) == Tensor, 't must be a singa Tensor instance'
++        assert isinstance(t, Tensor), 't must be a singa Tensor instance'
          self.singa_tensor.CopyData(t.singa_tensor)
  
      def clone(self):
@@@ -404,16 -403,28 +403,38 @@@
                                      self.singa_tensor, rhs)
  
      def __lt__(self, rhs):
-         return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.LT_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(singa.LT_TT, self.singa_tensor,
++                                    rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs)
  
      def __le__(self, rhs):
-         return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.LE_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(
++                singa.LE_TT,
++                self.singa_tensor,
++                rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs)
  
      def __gt__(self, rhs):
-         return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.GT_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(
++                singa.GT_TT,
++                self.singa_tensor,
++                rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs)
  
      def __ge__(self, rhs):
-         return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.GE_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(
++                singa.GE_TT,
++                self.singa_tensor,
++                rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs)
  
  
  ''' python functions for global functions in Tensor.h

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/test/CMakeLists.txt
----------------------------------------------------------------------
diff --cc test/CMakeLists.txt
index 6e7dd84,6fc4d77..6c21034
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@@ -22,9 -22,5 +22,8 @@@ ADD_EXECUTABLE(test_singa "gtest/gtest_
  ADD_DEPENDENCIES(test_singa singa_core singa_utils)
  #MESSAGE(STATUS "link libs" ${singa_linker_libs})
  TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
 -    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
 -SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
 +    singa_io proto protobuf ${SINGA_LINKER_LIBS})
 +IF(UNIX AND (NOT APPLE))
 +    LIST(APPEND LINK_FLAGS "-pthread")
 +ENDIF()
 +SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
-

[05/51] [abbrv] incubator-singa git commit: SINGA-217 build python package with setup.py - Singa python binding, package with pip or create wheel - In the cmake file, specify USE_PYTHON ON, then cmake and make. After that run " pip install

Posted by wa...@apache.org.

SINGA-217 build python package with setup.py
  - Singa python binding, package with pip or create wheel
  - In the cmake file, specify USE_PYTHON ON, then cmake and make.
    After that run "<sudo> pip install ." under "build/python" path
    User can run command 'singa' in a model workspace to serve and train
  - dependent on numpy>=1.11.0; protobuf>=2.5.0,<3; flask>=0.10.1
    Users should also install openblas
  - Models are sharing on github gists.
  - Todo, optimize cmake compile method to avoid compile source file twice
    refine singa command


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bf81f252
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bf81f252
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bf81f252

Branch: refs/heads/master
Commit: bf81f252647f60d525ffc9ddfa7031183afbc6e5
Parents: 05720c2
Author: aaronwwf <dc...@gmail.com>
Authored: Wed Jul 20 17:23:19 2016 +0800
Committer: aaronwwf <dc...@gmail.com>
Committed: Thu Aug 11 16:55:43 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                |   5 +-
 cmake/Dependencies.cmake      |   2 +-
 include/singa/core/device.h   |   3 +
 src/CMakeLists.txt            |  39 +++---
 src/python/setup.py.in        |   7 +-
 src/python/singa/__init__.py  | 244 ++++++++++++++++++++++++++++++++++++-
 src/python/singa/net.py       |   2 +-
 src/python/swig/config.i.in   |   5 +
 src/python/swig/core_device.i |   2 +
 src/python/swig/model_layer.i |   6 +
 src/python/swig/singa.i       |   1 +
 test/python/test_layer.py     |   2 +-
 12 files changed, 285 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9efadc0..9c4f326 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,7 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
 
 PROJECT(singa)
+SET(PACKAGE_VERSION "1.0.0")
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g -O2 ")
 
 LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
@@ -19,8 +20,8 @@ SET(SINGA_INCLUDE_DIR
 INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
 
 OPTION(USE_CBLAS "Use CBlas libs" ON)
-OPTION(USE_CUDA "Use Cuda libs" ON)
-OPTION(USE_CUDNN "Use Cudnn libs" ON)
+OPTION(USE_CUDA "Use Cuda libs" OFF)
+OPTION(USE_CUDNN "Use Cudnn libs" OFF)
 OPTION(USE_OPENCV "Use opencv" OFF)
 OPTION(USE_LMDB "Use LMDB libs" OFF)
 OPTION(USE_PYTHON "Generate py wrappers" ON)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 68d0bfc..ceef429 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -5,7 +5,7 @@ SET(SINGA_LINKER_LIBS "")
 FIND_PACKAGE( Protobuf REQUIRED )
 INCLUDE_DIRECTORIES(SYSTEM ${PROTOBUF_INCLUDE_DIR})
 MESSAGE(STATUS "proto libs " ${PROTOBUF_LIBRARIES})
-LIST(APPEND singa_linker_libs ${PROTOBUF_LIBRARIES})
+LIST(APPEND SINGA_LINKER_LIBS ${PROTOBUF_LIBRARIES})
 INCLUDE("cmake/Protobuf.cmake")
 
 IF(USE_LMDB)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index 4c46114..a564524 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -303,6 +303,7 @@ private:
 /// If CUDA or OPENCL are not enabled, then the respective related methods should
 /// return something that indicates their absence (for example, 0 devices);
 /// however they should always be available regardless of compile-time switches.
+#ifdef USE_CUDA
 class Platform {
 public:
 
@@ -372,6 +373,8 @@ private:
 #endif  // USE_OPENCL
 };
 
+#endif // USE_CUDA
+
 }  // namespace singa
 
 #endif  // SINGA_CORE_DEVICE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 06f177d..66d89dc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -6,6 +6,7 @@ IF (USE_PYTHON)
     protobuf_generate_python(proto_pys ${proto_files})
 ENDIF()
 INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include")
+
 #message(STATUS "include: ${CMAKE_BINARY_DIR} ")
 #message(STATUS "srcs: ${proto_srcs}")
 #message(STATUS "hdrs: ${proto_hdrs}")
@@ -21,6 +22,8 @@ FOREACH(fil ${proto_hdrs})
 ENDFOREACH()
 LIST(APPEND SINGA_LINKER_LIBS proto)
 
+SET(PREVIOUS_LINKER_LIBS ${SINGA_LINKER_LIBS})
+
 #FILE(GLOB_RECURSE utils_source ${CMAKE_CURRENT_SOURCE_DIR}/utils/ "*.cc")
 AUX_SOURCE_DIRECTORY(utils utils_source)
 #message(STATUS "UTILS ${utils_source}")
@@ -28,7 +31,6 @@ ADD_LIBRARY(singa_utils SHARED ${utils_source})
 TARGET_LINK_LIBRARIES(singa_utils ${SINGA_LINKER_LIBS})
 LIST(APPEND SINGA_LINKER_LIBS singa_utils)
 
-
 #FILE(GLOB_RECURSE core_source ${CMAKE_CURRENT_SOURCE_DIR}/core/ "*.cc")
 AUX_SOURCE_DIRECTORY(core/device core_source)
 AUX_SOURCE_DIRECTORY(core/memory core_source)
@@ -74,18 +76,11 @@ ADD_LIBRARY(singa_io SHARED ${io_source})
 TARGET_LINK_LIBRARIES(singa_io ${SINGA_LINKER_LIBS})
 LIST(APPEND SINGA_LINKER_LIBS singa_io)
 
-#ADD_LIBRARY(singa_layer SHARED ${LAYER_SOURCE})
-#ADD_LIBRARY(singa_model SHARED ${MODEL_SOURCE})
-#ADD_LIBRARY(singa_utils SHARED ${UTILS_SOURCE})
-
-#TARGET_LINK_LIBRARIES(singa_core singa_utils)
-#TARGET_LINK_LIBRARIES(singa_layer singa_core singa_utils)
-#TARGET_LINK_LIBRARIES(singa_model singa_layer singa_core singa_utils)
-
-#ADD_LIBRARY(singa SHARED ${SINGA_LINKER_LIBS})
-
 IF(USE_PYTHON)
 
+    FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i")
+    CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i.in" "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i")
+
     FILE(GLOB python_files python/swig/singa.i)
     # delete old .cxx file
     FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/singa_wrap.cxx")
@@ -98,17 +93,13 @@ IF(USE_PYTHON)
     file(GLOB_RECURSE python_source_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.py)
 
     create_symlinks(${python_source_files})
-
-    FILE(GLOB_RECURSE layer_source_files ${CMAKE_SOURCE_DIR}/src/model/layer/*.cc)
-    # message(STATUS "layer_source_files ${layer_source_files}")
-    FILE(GLOB_RECURSE metric_source_files ${CMAKE_SOURCE_DIR}/src/model/metric/*.cc)
-    FILE(GLOB_RECURSE loss_source_files ${CMAKE_SOURCE_DIR}/src/model/loss/*.cc)
-    SET(python_cxxs "${CMAKE_SOURCE_DIR}/src/core/tensor/tensor.cc;${CMAKE_SOURCE_DIR}/src/core/device/device.cc;${layer_source_files};${metric_source_files};${loss_source_files}")
-    ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${python_cxxs})
-    SET(WRAPPER_LINKER_LIBS "${SINGA_LINKER_LIBS};protobuf")
+    
+    SET(python_cxxs "${core_source};${io_source};${model_source};${utils_source}")
+    ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${python_cxxs} ${cuda_objs})
+    SET(WRAPPER_LINKER_LIBS "${PREVIOUS_LINKER_LIBS}")
     TARGET_LINK_LIBRARIES(_singa_wrap ${WRAPPER_LINKER_LIBS})
     TARGET_INCLUDE_DIRECTORIES(_singa_wrap PRIVATE ${PYTHON_INCLUDE_DIRS})
-    #message(STATUS "PYTHON_INCLUDE_DIRS ${PYTHON_INCLUDE_DIRS}")
+    #message(STATUS "PREVIOUS_LINKER_LIBS ${PREVIOUS_LINKER_LIBS}")
 
     SET_TARGET_PROPERTIES(_singa_wrap
         PROPERTIES PREFIX ""
@@ -117,10 +108,10 @@ IF(USE_PYTHON)
 
     #SETUP
     SET(SETUP_PY_IN "python/setup.py.in")
-    set(SETUP_PY    "${CMAKE_BINARY_DIR}/python/setup.py")
-    configure_file(${SETUP_PY_IN} ${SETUP_PY})
+    SET(SETUP_PY    "${CMAKE_BINARY_DIR}/python/setup.py")
+    CONFIGURE_FILE(${SETUP_PY_IN} ${SETUP_PY})
 
-    #COPY protobuf files to python/singa/proto
-    FILE(COPY ${CMAKE_BINARY_DIR}/python/singa/__init__.py DESTINATION ${CMAKE_BINARY_DIR}/python/singa/proto )
+    #create python/singa/proto/__init__.py
+    FILE(WRITE ${CMAKE_BINARY_DIR}/python/singa/proto/__init__.py "")
 
 ENDIF(USE_PYTHON)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/python/setup.py.in
----------------------------------------------------------------------
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index b53e54b..d1ac3c9 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -40,7 +40,12 @@ setup(
 
     #py_modules=["singa"],
 
-    #install_requires=['peppercorn'],
+    install_requires=[
+        'numpy>=1.11.0',
+        'protobuf>=2.5.0,<3',
+        'flask>=0.10.1'
+        ],
+
     #List additional groups of dependencies here (e.g. development
     #dependencies). You can install these using the following syntax,
     #for example:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index d5e48bb..ec26906 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -1,3 +1,241 @@
-def main():
-    """Entry point for the application script"""
-    print("Welcome to SINGA!")
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+
+'''
+This script is the main entrance for user to run singa inside a model workspace
+
+To use this script, user sudo install these dependencies: flask pillow and protobuf 
+'''
+
+import sys, glob, os, random, shutil, time
+from flask import Flask, request, redirect, url_for
+from PIL import Image
+import numpy as np
+import ConfigParser 
+import urllib, traceback
+
+
+from argparse import ArgumentParser
+from argparse import RawDescriptionHelpFormatter
+sys.path.append(os.getcwd())
+
+__all__ = []
+__version__ = 0.1
+__date__ = '2016-07-20'
+__updated__ = '2016-07-20'
+__shortdesc__ = '''
+welcome to singa
+'''
+
+app = Flask(__name__)
+config = ConfigParser.RawConfigParser()    
+service = {}
+data_path = "data_"
+parameter_path = "parameter_"
+
+debug = False
+
+class CLIError(Exception):
+    '''Generic exception to raise and log different fatal errors.'''
+    def __init__(self, msg):
+        super(CLIError).__init__(type(self))
+        self.msg = "E: %s" % msg
+    def __str__(self):
+        return self.msg
+    def __unicode__(self):
+        return self.msg
+
+def main(argv=None): # IGNORE:C0111
+    '''Command line options.'''
+
+    from . import device
+
+    if argv is None:
+        argv = sys.argv
+    else:
+        sys.argv.extend(argv)
+
+    program_name = os.path.basename(sys.argv[0])
+    program_version = "v%s" % __version__
+    program_build_date = str(__updated__)
+    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
+    program_shortdesc = __shortdesc__
+    program_license = '''%s
+
+  Created by dbsystem group on %s.
+  Copyright 2016 NUS School of Computing. All rights reserved.
+
+  Licensed under the Apache License 2.0
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Distributed on an "AS IS" basis without warranties
+  or conditions of any kind, either express or implied.
+
+USAGE
+''' % (program_shortdesc, str(__date__))
+
+    global debug
+
+    try:
+        # Setup argument parser
+        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
+        parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
+        parser.add_argument("-param", "--parameter", dest="parameter",  help="the parameter file path to be loaded")
+        parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
+        parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
+        parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
+        parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
+        parser.add_argument('-V', '--version', action='version', version=program_version_message)
+
+        # Process arguments
+        args = parser.parse_args()
+
+        port = args.port
+        parameter_file = args.parameter
+        mode = args.mode
+        need_reload = args.reload_data
+        use_cpu = args.use_cpu
+        debug = args.debug
+
+        #prepare data files
+        config.read('file.cfg')
+        file_prepare(need_reload)
+
+
+        import network as net
+        model = net.create()
+
+        #load parameter
+        parameter_file=get_parameter(parameter_file)
+
+        if parameter_file:
+            print "load parameter file: %s" % parameter_file
+            model.load(parameter_file)
+        
+        if use_cpu:
+            raise CLIError("Currently cpu is not support!")
+        else:
+            print "runing with gpu"
+            d = device.create_cuda_gpu()
+
+        model.to_device(d)
+
+        if mode == "serve":
+            print "runing singa in serve mode, listen to  port: %s " % port
+            global service
+            from serve import Service
+            service =Service(model,d)
+
+            app.debug = debug 
+            app.run(host='0.0.0.0', port= port)
+        elif mode == "train":
+            print "runing singa in train mode"
+            global trainer 
+            from train import Trainer 
+            trainer= Trainer(model,d)
+            if not parameter_file:
+                trainer.initialize()
+            trainer.train()
+        else:
+            raise CLIError("Currently only serve mode is surpported!")
+        return 0
+    except KeyboardInterrupt:
+        ### handle keyboard interrupt ###
+        return 0
+    except Exception, e:
+        if debug:
+            traceback.print_exc()
+            raise(e)
+        indent = len(program_name) * " "
+        sys.stderr.write(program_name + ": " + str(e) + "\n")
+        sys.stderr.write(indent + "  for help use --help \n\n")
+        return 2
+
+def file_prepare(reload_data=False):
+    '''
+        download all files and generate data.py
+    '''
+    if not reload_data and os.path.exists("data_.py"):
+        return
+
+    print "download file"
+    #clean data
+    shutil.rmtree("data_.py",ignore_errors=True)
+    shutil.rmtree("data_",ignore_errors=True)
+
+    data_py=open("data_.py",'w') 
+    data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
+    if config.has_section("data"):
+        file_list = config.items("data")
+        #download files
+        for f in file_list:
+            name,path=download_file(f[0],f[1],data_path)
+            data_py.write("%s=\"%s\"\n" % (name,path)) 
+
+    data_py.flush()
+    data_py.close()
+    
+    if config.has_section("parameter"):
+        parameter_list = config.items("parameter")
+        for p in parameter_list:
+            download_file(p[0],p[1],parameter_path)
+
+def download_file(name,path,dest):
+    '''
+    download one file to dest
+    '''
+    if not os.path.exists(dest):
+        os.makedirs(dest)
+    if (path.startswith('http')):
+        file_name = path.split('/')[-1]
+        target = os.path.join(dest,file_name)
+        urllib.urlretrieve(path,target)
+    return name,target
+
+
+def get_parameter(file_name=None):
+    '''
+    get the paticular file name or get the last parameter file
+    '''
+    if not os.path.exists(parameter_path):
+        os.makedirs(parameter_path)
+        return 
+
+    if file_name:
+	return os.path.join(parameter_path,file_name)
+
+    parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
+    if len(parameter_list)==0:
+        return
+    parameter_list.sort()
+
+    return parameter_list[-1]
+	
+@app.route("/")
+def index():
+    return "Hello SINGA User!"
+
+@app.route('/predict', methods=['POST'])
+def predict():
+    if request.method == 'POST':
+        try:
+            response=service.serve(request)
+        except Exception as e:
+            return e
+        return response
+    return "error, should be post request"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
index 1617717..f040378 100644
--- a/src/python/singa/net.py
+++ b/src/python/singa/net.py
@@ -48,7 +48,7 @@ class FeedForwardNet(object):
         """
         if len(self.layers) > 0 and lyr.has_setup is False:
             shape = self.layers[-1].get_output_sample_shape()
-            print shape
+            #print shape
             lyr.setup(shape)
         self.layers.append(lyr)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/python/swig/config.i.in
----------------------------------------------------------------------
diff --git a/src/python/swig/config.i.in b/src/python/swig/config.i.in
new file mode 100644
index 0000000..ed386b2
--- /dev/null
+++ b/src/python/swig/config.i.in
@@ -0,0 +1,5 @@
+// Pass in cmake configurations to swig
+#cmakedefine01 USE_CUDA 
+#cmakedefine01 USE_CUDNN 
+#cmakedefine CUDNN_VERSION_MAJOR ${CUDNN_VERSION_MAJOR}
+#cmakedefine CUDNN_VERSION_PATCH ${CUDNN_VERSION_PATCH} 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/python/swig/core_device.i
----------------------------------------------------------------------
diff --git a/src/python/swig/core_device.i b/src/python/swig/core_device.i
index a5d0731..21b97b4 100644
--- a/src/python/swig/core_device.i
+++ b/src/python/swig/core_device.i
@@ -49,6 +49,7 @@ class Device {
   int id() const;
 };
 
+#if USE_CUDA
 class Platform {
  public:
   static int GetNumGPUs();
@@ -63,5 +64,6 @@ class Platform {
   static std::shared_ptr<Device> GetDefaultDevice();
 };
 
+#endif // USE_CUDA
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/python/swig/model_layer.i
----------------------------------------------------------------------
diff --git a/src/python/swig/model_layer.i b/src/python/swig/model_layer.i
index a6cdad1..f82b0f1 100644
--- a/src/python/swig/model_layer.i
+++ b/src/python/swig/model_layer.i
@@ -44,7 +44,9 @@ using singa::LayerConf;
 
 %shared_ptr(singa::Layer)
 %shared_ptr(singa::RNN)
+#if USE_CUDNN
 %shared_ptr(singa::CudnnRNN)
+#endif
 
 namespace std {
   %template(strVector) vector<string>;
@@ -81,6 +83,8 @@ const std::vector<std::string> GetRegisteredLayers();
 class RNN : public Layer {
 };
 
+#if USE_CUDA && USE_CUDNN
+#if CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
 class CudnnRNN : public RNN {
  public:
  // note: Must use std::vector instead of vector.
@@ -92,5 +96,7 @@ class CudnnRNN : public RNN {
     const std::vector<size_t> GetOutputSampleShape() const override;
 };
 
+#endif  // CUDNN_VERSION_MINOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#endif  // USE_CUDA && USE_CUDNN
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/src/python/swig/singa.i
----------------------------------------------------------------------
diff --git a/src/python/swig/singa.i b/src/python/swig/singa.i
index 3f12569..12f46f3 100644
--- a/src/python/swig/singa.i
+++ b/src/python/swig/singa.i
@@ -22,6 +22,7 @@
 /*interface file for swig */
 
 %module singa_wrap
+%include "config.i"
 %include "core_tensor.i"
 %include "core_device.i"
 %include "model_layer.i"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf81f252/test/python/test_layer.py
----------------------------------------------------------------------
diff --git a/test/python/test_layer.py b/test/python/test_layer.py
index 7e1059e..7078240 100644
--- a/test/python/test_layer.py
+++ b/test/python/test_layer.py
@@ -3,7 +3,7 @@ import os
 import unittest
 import numpy as np
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import layer
 from singa import device

[40/51] [abbrv] incubator-singa git commit: Merge PR #245 for fixing the error in test_snapshort.cc due to data type problems.

Posted by wa...@apache.org.

Merge PR #245 for fixing the error in test_snapshort.cc due to data type problems.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/cdc5ffd7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/cdc5ffd7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/cdc5ffd7

Branch: refs/heads/master
Commit: cdc5ffd79f34c3227891425987039f460a9790b8
Parents: d2300ae f80fedb
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Wed Aug 17 11:30:30 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Aug 17 11:30:30 2016 +0800

----------------------------------------------------------------------
 test/singa/test_snapshot.cc | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)
----------------------------------------------------------------------

[03/51] [abbrv] incubator-singa git commit: SINGA-229 Complete install targets

Posted by wa...@apache.org.

SINGA-229 Complete install targets

Remove bin/ from install folder.
Move singa_config.h into include/singa/.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a0af4658
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a0af4658
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a0af4658

Branch: refs/heads/master
Commit: a0af4658c3545009c9ee8f58acd8bd8c047e4ebb
Parents: e39be3e
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Wed Aug 10 17:10:52 2016 +0800
Committer: xiezl <xi...@comp.nus.edu.sg>
Committed: Thu Aug 11 11:06:39 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a0af4658/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4661c58..32d3b8e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,8 @@ ADD_SUBDIRECTORY(src)
 ADD_SUBDIRECTORY(test)
 ADD_SUBDIRECTORY(examples)
 
-INSTALL(DIRECTORY include/singa DESTINATION include)
-INSTALL(FILES ${CMAKE_BINARY_DIR}/include/singa/singa_config.h DESTINATION include)
+INSTALL(DIRECTORY include/singa DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
+INSTALL(FILES ${CMAKE_BINARY_DIR}/include/singa/singa_config.h DESTINATION
+    ${CMAKE_INSTALL_PREFIX}/include/singa)
 INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/lib DESTINATION ${CMAKE_INSTALL_PREFIX})
-INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/bin DESTINATION ${CMAKE_INSTALL_PREFIX})
+#INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/bin DESTINATION ${CMAKE_INSTALL_PREFIX})

[13/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

Updated the comments of python files for autodoc to generate python APIs by Sphinx.

Fixed a bug in optimizer which ignored the momentum value


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8cd55300
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8cd55300
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8cd55300

Branch: refs/heads/master
Commit: 8cd55300ab30673414bbeeec7d68f1ddcd6393a2
Parents: 3299b0c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Fri Aug 12 14:45:41 2016 +0800
Committer: wangwei <wa...@gmail.com>
Committed: Sun Aug 14 13:47:04 2016 +0800

----------------------------------------------------------------------
 cmake/Dependencies.cmake        |   5 +-
 doc/conf.py                     |  14 +-
 doc/docs/device.rst             |  29 +-
 doc/docs/index.rst              |   6 +
 doc/docs/initializer.rst        |  12 +
 doc/docs/layer.rst              |  14 +
 doc/docs/loss.rst               |   7 +
 doc/docs/metric.rst             |   8 +
 doc/docs/optimizer.rst          |  11 +
 doc/docs/tensor.md              |   7 -
 doc/docs/tensor.rst             |  30 ++
 doc/docs/utils.rst              |   6 +
 doc/index.rst                   |  28 +-
 examples/index.rst              |   6 +
 src/python/singa/device.py      |  31 ++
 src/python/singa/initializer.py |  86 ++++-
 src/python/singa/layer.py       | 417 ++++++++++++++----------
 src/python/singa/loss.py        | 105 +++++-
 src/python/singa/metric.py      |  49 ++-
 src/python/singa/optimizer.py   | 284 ++++++++--------
 src/python/singa/tensor.py      | 608 ++++++++++++++++++++++++++++++-----
 21 files changed, 1331 insertions(+), 432 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ceef429..e533ca8 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -54,12 +54,13 @@ IF(USE_OPENCV)
     MESSAGE(STATUS "Found OpenCV_${OpenCV_VERSION} at ${OpenCV_INCLUDE_DIRS}")
     INCLUDE_DIRECTORIES(SYSTEM ${OpenCV_INCLUDE_DIRS})
     LIST(APPEND SINGA_LINKER_LIBS ${OpenCV_LIBRARIES})
-ENDIF()    
+ENDIF()
 
 #LIST(APPEND SINGA_LINKER_LIBS "/home/wangwei/local/lib/libopenblas.so")
 #MESSAGE(STATUS "link lib : " ${SINGA_LINKER_LIBS})
 
 IF(USE_PYTHON)
-    FIND_PACKAGE(PythonLibs REQUIRED)
+    FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
+    FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
     FIND_PACKAGE(SWIG 3.0 REQUIRED)
 ENDIF()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 20ba51a..9f52d16 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,7 +19,8 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, '../src/python/singa/')
+sys.path.insert(1, os.path.abspath('../build/python'))
+#autodoc_mock_imports = ['singa.device', 'singa.tensor', 'singa.layer']
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,9 +36,8 @@ source_parsers = {
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-'sphinx.ext.autodoc'
-]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -50,7 +50,7 @@ source_suffix = ['.rst', '.md']
 
 # The encoding of source files.
 #
-# source_encoding = 'utf-8-sig'
+source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
@@ -150,7 +150,7 @@ html_theme = 'sphinx_rtd_theme'
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-html_logo = '/singa.png'
+html_logo = 'image/singa.png'
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -203,7 +203,7 @@ html_static_path = ['_static']
 
 # If true, links to the reST sources are added to the pages.
 #
-html_show_sourcelink = False
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
index aa5defb..53faf48 100644
--- a/doc/docs/device.rst
+++ b/doc/docs/device.rst
@@ -2,7 +2,10 @@ Device
 =======
 
 
-The Device abstract represent a hardware device with memory and compuation units.
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
 
 Specific devices
 ----------------
@@ -13,24 +16,14 @@ Currently, SINGA has three Device implmentations,
 3. OpenclGPU for a GPU card which runs OpenCL code
 
 
-Create devices
----------------
-
 Python API
-~~~~~~~~~~
-
-.. autofunction:: device.create_cuda_gpus
-
-.. autofunction:: device.create_cuda_gpus_on
-
-.. autofunction:: device.create_cuda_gpu_on
-
-.. autofunction:: device.get_default_device
+----------
 
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
 
-The following code shows how to create devices,
 
-.. code:: python
+The following code provides examples of creating devices::
 
    from singa import device
    cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
@@ -39,9 +32,5 @@ The following code shows how to create devices,
    ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
 
 
-
 CPP API
-~~~~~~~
-
-
-
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 8a74976..2294054 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -6,4 +6,10 @@ English
    installation
    software_stack
    device
+   tensor
+   layer
+   initializer
+   loss
+   metric
+   optimizer
    examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
new file mode 100644
index 0000000..a190702
--- /dev/null
+++ b/doc/docs/initializer.rst
@@ -0,0 +1,12 @@
+Initializer
+===========
+
+Python API
+----------
+
+.. automodule:: singa.initializer
+   :members:
+   :member-order: bysource
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/layer.rst b/doc/docs/layer.rst
new file mode 100644
index 0000000..62ef3c3
--- /dev/null
+++ b/doc/docs/layer.rst
@@ -0,0 +1,14 @@
+Layer
+======
+
+Python API
+-----------
+.. automodule:: singa.layer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/docs/loss.rst b/doc/docs/loss.rst
new file mode 100644
index 0000000..27872dd
--- /dev/null
+++ b/doc/docs/loss.rst
@@ -0,0 +1,7 @@
+Loss
+=========
+
+
+.. automodule:: singa.loss
+   :members:
+   :show-inheritance:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/docs/metric.rst b/doc/docs/metric.rst
new file mode 100644
index 0000000..35fa24e
--- /dev/null
+++ b/doc/docs/metric.rst
@@ -0,0 +1,8 @@
+Metric
+=========
+
+
+.. automodule:: singa.metric
+   :members:
+   :show-inheritance:
+   :member-order: bysource

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/optimizer.rst b/doc/docs/optimizer.rst
new file mode 100644
index 0000000..486c01e
--- /dev/null
+++ b/doc/docs/optimizer.rst
@@ -0,0 +1,11 @@
+Optimizer
+=========
+
+
+.. automodule:: singa.optimizer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
deleted file mode 100644
index eaf8362..0000000
--- a/doc/docs/tensor.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Tensor
-
-
-##
-
-
-##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
new file mode 100644
index 0000000..ff6142e
--- /dev/null
+++ b/doc/docs/tensor.rst
@@ -0,0 +1,30 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+
+.. automodule:: singa.tensor
+   :members:
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/docs/utils.rst b/doc/docs/utils.rst
new file mode 100644
index 0000000..5306719
--- /dev/null
+++ b/doc/docs/utils.rst
@@ -0,0 +1,6 @@
+Misc.
+=========
+
+
+.. automodule:: singa.utils
+   :members:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
index ec727b1..50c65d7 100755
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,9 +7,9 @@ Welcome to Apache Singa
 =======================
 
 Recent News
-===========
+-----------
 
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_ 
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
 
 * The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
 
@@ -34,7 +34,7 @@ Recent News
 * SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
 
 Getting Started
-===============
+---------------
 * The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
@@ -42,7 +42,7 @@ Getting Started
 * Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
 
 Documentation
-=============
+-------------
 
 * Documentations are listed `here <docs.html>`_.
 
@@ -51,8 +51,8 @@ Documentation
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
-=================
-  
+----------------------
+
 * Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
 
 * If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
@@ -62,17 +62,17 @@ How to contribute
 More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
 
 Citing SINGA
-============
+------------
 
 Please cite the following two papers if you use SINGA in your research:
 
 * B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
 
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_). 
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
 
 .. toctree::
    :hidden:
-   
+
    downloads
    docs
 
@@ -85,25 +85,25 @@ Please cite the following two papers if you use SINGA in your research:
    develop/how-contribute
    develop/contribute-code
    develop/contribute-docs
-   
+
 .. toctree::
    :hidden:
    :maxdepth: 2
    :caption: Community
-   
+
    community/source-repository
    community/mail-lists
    community/issue-tracking
    community/team-list
-   
+
 
 
 License
-=======
+----------
 SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
 
 Disclaimers
-===========
+-----------
 
 Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
new file mode 100644
index 0000000..d6faf5d
--- /dev/null
+++ b/examples/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+
+   char-rnn/README
+   imagenet/README
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index aff3587..eff6783 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -68,21 +68,52 @@ def device_query(id, verbose=False):
 
 
 def create_cuda_gpus(num):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        num (int): number of device to create.
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
+
     return singa.Platform.CreateCudaGPUs(num)
 
 
 def create_cuda_gpu():
+    '''Create a single CudaGPU device.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
+
     return singa.Platform.CreateCudaGPUs(1)[0]
 
 
 def create_cuda_gpus_on(device_ids):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        device_ids (list): a list of GPU card IDs.
+
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
     return singa.Platform.CreateCudaGPUsOn(device_ids)
 
 
 def create_cuda_gpu_on(device_id):
+    '''Create a CudaGPU device on the given device ID.
+
+    Args:
+        device_id (int): GPU card ID.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
 
 def get_default_device():
+    '''Get the default host device which is a CppCPU device'''
     return singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 15caed3..277fd2f 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -15,29 +15,113 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-"""Popular initialization methods for parameter values (Tensor ojects)"""
+'''Popular initialization methods for parameter values (Tensor objects).
+
+Example usages::
+
+    from singa import tensor
+    from singa import initializer
+
+    x = tensor.Tensor((3, 5))
+    initializer.xavier(x)
+'''
 
 import math
 
 
+'''
+TODO(wangwei) update the uniform and gaussian initializers
+
+def uniform(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    training deep feedforward neuralnetworks.
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+    t.uniform(-x, x)
+
+
+def gaussian(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+
+    Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
+    Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+    t.gaussian(0, std)
+'''
+
+
 def uniform(t, low=0, high=1):
+    '''Initialize the parameter values following an Uniform distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        low (float): lower bound
+        high (float): higher bound
+    '''
     t.uniform(low, high)
 
 
 def gaussian(t, mean=0, std=0.01):
+    '''Initialize the parameter values following an Gaussian distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        mean (float): mean of the distribution
+        std (float): standard variance
+    '''
     t.gaussian(mean, std)
 
 
 def xavier(t):
+    '''Initialize the matrix parameter follow a Uniform distribution from
+    [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
+
     scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1]))
     t.uniform(-scale, scale)
 
 
 def glorot(t):
+    '''Initialize the matrix parameter follow a Gaussian distribution with
+    mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1]))
     t.gaussian(0, 1)
     t *= scale
 
 
 def msra(t):
+    '''Initialize the matrix parameter follow a Guassian distribution with
+    mean = 0, std = math.sqrt(2.0 / nb_row).
+
+    Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
+    nonlinearities.
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     t.gaussian(0, math.sqrt(2.0 / t.shape[0]))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..0759716 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -14,7 +14,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-""" Python layers which wraps the C++ layers by providing easy to construct APIs
+""" Python layers wrap the C++ layers to provide simpler construction APIs.
+
+Example usages::
+
+    from singa import layer
+    from singa import tensor
+    from singa import device
+    from singa.model_pb2 import kTrain
+
+    layer.engine = 'cudnn'  # to use cudnn layers
+    dev = device.create_cuda_gpu()
+
+    # create a convolution layer
+    conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32))
+    conv.to_device(dev)  # move the layer data onto a CudaGPU device
+    x = tensor.Tensor((3, 32, 32), dev)
+    x.uniform(-1, 1)
+    y = conv.foward(kTrain, x)
+
+    dy = tensor.Tensor()
+    dy.reset_like(y)
+    dy.set_value(0.1)
+    # dp is a list of tensors for parameter gradients
+    dx, dp = conv.backward(kTrain, dy)
 """
 
 from sets import Set
@@ -22,23 +45,37 @@ from . import singa_wrap
 from .proto import model_pb2
 import tensor
 
-# engine could be 'cudnn', 'singa', which is used to create layers.
-# e.g., CudnnConvolution layer is identified by 'cudnn_convolution'
-# Convolution layer is identified by 'singa_convolution'
-# engine is case insensitive
+
 engine = 'cudnn'
+'''engine is the prefix of layer identifier.
+
+The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], for
+layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively.
+For example, CudnnConvolution layer is identified by 'cudnn_convolution';
+'singacpp_convolution' is for Convolution layer;
+Some layers' implementation use only Tensor functions, thererfore they are
+transparent to the underlying devices. For threse layers, they would have
+multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
+singacl_dropout are all for the Dropout layer.
+
+engine is case insensitive. Each python layer would create the correct specific
+layer using the engine attribute.
+'''
 
 
 class Layer(object):
-    """Base Python layer class.
+    '''Base Python layer class.
 
-    Usages:
-        1.  construct layer without input_sample_shapes, goto 2;
-            construct layer with input_sample_shapes, goto 3;
+    Typically, the life cycle of a layer instance includes:
+        1. construct layer without input_sample_shapes, goto 2;
+           construct layer with input_sample_shapes, goto 3;
         2. call setup to create the parameters and setup other meta fields
         3. call forward or access layer members
         4. call backward and get parameters for update
-    """
+
+    Args:
+        name (str): layer name
+    '''
 
     def __init__(self, name, **kwargs):
         self.layer = None  # layer converted by swig
@@ -49,20 +86,24 @@ class Layer(object):
         self.has_setup = False
 
     def param_names(self):
+        '''
+        Returns:
+            a list of strings, one for the name of one parameter Tensor
+        '''
         names = []
         for x in self.param_specs:
             names.append(x['name'])
         return names
 
     def setup(self, in_shapes):
-        """Call the C++ setup function to create params and set some meta data.
+        '''Call the C++ setup function to create params and set some meta data.
 
         Args:
             in_shapes: if the layer accepts a single input Tensor, in_shapes is
                 a single tuple specifying the inpute Tensor shape; if the layer
                 accepts multiple input Tensor (e.g., the concatenation layer),
-                in_shapes is a tuple of tuples, each for one input Tensor shape
-        """
+                in_shapes is a tuple of tuples, each for one input Tensor
+        '''
         if self.has_setup:
             return
         self.layer.Setup(list(in_shapes),
@@ -70,54 +111,92 @@ class Layer(object):
         self.has_setup = True
 
     def get_output_sample_shape(self):
+        '''Called after setup to get the shape of the output sample(s).
+
+        Returns:
+            a tuple for a single output Tensor or a list of tuples if this layer
+            has multiple outputs
+        '''
         assert self.has_setup, \
             'Must call setup() before get_output_sample_shape()'
         return self.layer.GetOutputSampleShape()
 
     def param_values(self):
-        """Return param value tensors.
+        '''Return param value tensors.
 
-        Do not store these tensors as layer members because cpp Tensor could be
-        moved onto diff devices due to the change of layer device. However, the
-        py tensors would not update its internal cpp tensor automatically.
-        """
+        Parameter tensors are not stored as layer members because cpp Tensor
+        could be moved onto diff devices due to the change of layer device,
+        which would result in inconsistency.
+
+        Returns:
+            a list of tensors, one for each paramter
+        '''
         return tensor.from_raw_tensors(self.layer.param_values())
 
-    def forward(self, flag, input):
+    def forward(self, flag, x):
         '''Forward propagate through this layer.
 
         Args:
-            flag, kTrain or kEval
-            input, an input tensor
+            flag (int): kTrain or kEval
+            x (Tensor or list<Tensor>): an input tensor if the layer is
+                connected from a single layer; a list of tensors if the layer
+                is connected from multiple layers.
 
         Return:
-            a tensor for the transformed feature
+            a tensor if the layer is connected to a single layer; a list of
+            tensors if the layer is connected to multiple layers;
         '''
         assert self.has_setup, 'Must call setup() before forward()'
-        assert isinstance(input, tensor.Tensor), 'input must be py Tensor'
-        y = self.layer.Forward(flag, input.singa_tensor)
-        return tensor.from_raw_tensor(y)
+        if type(x) == list:
+            xs = []
+            for t in x:
+                x.append(t.singa_tensor)
+        else:
+            assert isinstance(input, tensor.Tensor), \
+                'input must be a Tensor or a list of Tensor'
+            xs = x
+        y = self.layer.Forward(flag, xs)
+        if type(y) == list:
+            return tensor.from_raw_tensors(y)
+        else:
+            return tensor.from_raw_tensor(y)
 
-    def backward(self, flag, grad):
-        '''Backward propagate through this layer.
+    def backward(self, flag, dy):
+        '''Backward propagate gradients through this layer.
 
         Args:
-            flag, for future use.
-            grad, gradient of the returned values of the forward function.
-
+            flag (int): for future use.
+            dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the
+                objective loss
         Return:
-            <dx, <dp1, dp2..>>, dx is the gradient of the input of the
-            forward function, dpi is the gradient of the i-th parameter
+            <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x
+            , dpi is the gradient of the i-th parameter
         '''
-        assert isinstance(grad, tensor.Tensor), 'grad must be py Tensor'
-        ret = self.layer.Backward(flag, grad.singa_tensor)
-        return tensor.from_raw_tensor(ret[0]), tensor.from_raw_tensors(ret[1])
+        if type(dy) == list:
+            dys = []
+            for t in dy:
+                dys.append(t.singa_tensor)
+        else:
+            assert isinstance(dy, tensor.Tensor), \
+                'the input must be a Tensor or a set of Tensor'
+            dys = dy.singa_tensor
+        ret = self.layer.Backward(flag, dys)
+        if type(ret[0]) == list:
+            dxs = tensor.from_raw_tensors(ret[0])
+        else:
+            dxs = tensor.from_raw_tensor(ret[0])
+        return dxs, tensor.from_raw_tensors(ret[1])
 
     def to_device(self, device):
+        '''Move layer state tensors onto the given device.
+
+        Args:
+            device: swig converted device, created using singa.device
+        '''
         self.layer.ToDevice(device)
 
     def as_type(self, dtype):
-        self.layer.AsType(dtype)
+        pass
 
     def __copy__(self):
         pass
@@ -127,43 +206,42 @@ class Layer(object):
 
 
 class Conv2D(Layer):
+    """Construct a layer for 2D convolution.
 
+    Args:
+        nb_kernels (int): num of the channels (kernels) of the input Tensor
+        kernel: an integer or a pair of integers for kernel height and width
+        stride: an integer or a pair of integers for stride height and width
+        border_mode (string): padding mode, case in-sensitive,
+            'valid' -> padding is 0 for height and width
+            'same' -> padding is half of the kernel (floor), the kernel must be
+            odd number.
+        cudnn_prefer (string): the preferred algorithm for cudnn convolution
+            which could be 'fatest', 'autotune', 'limited_workspace' and
+            'no_workspace'
+        data_format (string): either 'NCHW' or 'NHWC'
+        use_bias (bool): True or False
+        pad: an integer or a pair of integers for padding height and width
+        W_specs (dict): used to specify the weight matrix specs, fields
+            include,
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            TODO(wangwei) 'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): hyper-parameters for bias vector, similar as W_specs
+        name (string): layer name.
+        input_sample_shape: 3d tuple for the shape of the input Tensor
+            without the batchsize, e.g., (channel, height, width) or
+            (height, width, channel)
+    """
     def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
                  cudnn_prefer='fatest', data_format='NCHW',
                  use_bias=True, W_specs=None, b_specs=None,
                  pad=None, input_sample_shape=None):
-        """Construct a layer for 2D convolution.
-
-        Args:
-            nb_kernels (int): num of the channels (kernels) of the input Tensor
-            kernel: an integer or a pair of integers for kernel height and width
-            stride: an integer or a pair of integers for stride height and width
-            border_mode (string): padding mode, case in-sensitive,
-                'valid' -> padding is 0 for height and width
-                'same' -> padding is half of the kernel (floor),
-                    the kernel must be odd number.
-            cudnn_prefer (string): the preferred algorithm for cudnn convolution
-                which could be 'fatest', 'autotune', 'limited_workspace' and
-                'no_workspace'
-            data_format (string): either 'NCHW' or 'NHWC'
-            use_bias (bool): True or False
-            pad: an integer or a pair of integers for padding height and width
-            W_specs (dict): used to specify the weight matrix specs, fields
-                include,
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                TODO(wangwei) 'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): hyper-parameters for bias vector, similar as W_specs
-            name (string): layer name.
-            input_sample_shape: 3d tuple for the shape of the input Tensor
-                without the batchsize, e.g., (channel, height, width) or
-                (height, width, channel)
-        """
         super(Conv2D, self).__init__(name)
         assert data_format == 'NCHW', 'Not supported data format: %s ' \
             'only "NCHW" is enabled currently' % (data_format)
@@ -195,19 +273,19 @@ class Conv2D(Layer):
 
 
 class Conv1D(Conv2D):
+    """Construct a layer for 1D convolution.
+
+    Most of the args are the same as those for Conv2D except the kernel,
+    stride, pad, which is a scalar instead of a tuple.
+    input_sample_shape is a tuple with a single value for the input feature
+    length
+    """
 
     def __init__(self, name, nb_kernels, kernel=3, stride=1,
                  border_mode='same', cudnn_prefer='fatest',
                  use_bias=True, W_specs={'init': 'Xavier'},
                  b_specs={'init': 'Constant', 'value': 0}, pad=None,
                  input_sample_shape=None):
-        """Construct a layer for 1D convolution.
-
-        Most of the args are the same as those for Conv2D except the kernel,
-        stride, pad, which is a scalar instead of a tuple.
-        input_sample_shape is a tuple with a single value for the input feature
-        length
-        """
         pad = None
         if pad is not None:
             pad = (0, pad)
@@ -227,7 +305,15 @@ class Conv1D(Conv2D):
 
 
 class Pooling2D(Layer):
+    '''2D pooling layer providing max/avg pooling.
+
+    All args are the same as those for Conv2D, except the following one
 
+    Args:
+        mode: pooling type, model_pb2.PoolingConf.MAX or
+            model_pb2.PoolingConf.AVE
+
+    '''
     def __init__(self, name, mode, kernel=3, stride=2, border_mode='same',
                  pad=None, data_format='NCHW', input_sample_shape=None):
         super(Pooling2D, self).__init__(name)
@@ -312,28 +398,26 @@ class AvgPooling1D(AvgPooling2D):
 
 
 class BatchNormalization(Layer):
-    # TODO(wangwei) add mode and epsilon arguments
+    """Batch-normalization.
 
+    Args:
+        momentum (float): for running average mean and variance.
+        beta_specs (dict): dictionary includes the fields for the beta
+            param:
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        gamma_specs (dict): similar to beta_specs, but for the gamma param.
+        name (string): layer name
+        input_sample_shape (tuple): with at least one integer
+    """
     def __init__(self, name, momentum=0.9,
                  beta_specs=None, gamma_specs=None, input_sample_shape=None):
-        """Batch-normalization.
-
-        Args:
-            momentum (float): for running average mean and variance.
-            beta_specs (dict): dictionary includes the fields for the beta
-                param:
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            gamma_specs (dict): similar to beta_specs, but for the gamma param.
-            name (string): layer name
-            input_sample_shape (tuple): with at least one integer
-        """
         super(BatchNormalization, self).__init__(name)
         conf = self.conf.batchnorm_conf
         conf.factor = momentum
@@ -362,16 +446,17 @@ class BatchNormalization(Layer):
 
 
 class LRN(Layer):
+    """Local response normalization.
+
+    Args:
+        size (int): # of channels to be crossed
+            normalization.
+        mode (string): 'cross_channel'
+        input_sample_shape (tuple): 3d tuple, (channel, height, width)
+    """
+
     def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
                  k=1, input_sample_shape=None):
-        """Local response normalization.
-
-        Args:
-            size (int): # of channels to be crossed
-                normalization.
-            mode (string): 'cross_channel'
-            input_sample_shape (tuple): 3d tuple, (channel, height, width)
-        """
         super(LRN, self).__init__(name)
         conf = self.conf.lrn_conf
         conf.local_size = size
@@ -388,29 +473,28 @@ class LRN(Layer):
 
 
 class Dense(Layer):
+    """Apply linear/affine transformation, also called inner-product or
+    fully connected layer.
 
+    Args:
+        num_output (int): output feature length.
+        use_bias (bool): add a bias vector or not to the transformed feature
+        W_specs (dict): specs for the weight matrix
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): specs for the bias vector, same fields as W_specs.
+        W_transpose (bool): if true, output=x*W.T+b;
+        input_sample_shape (tuple): input feature length
+    """
     def __init__(self, name, num_output, use_bias=True,
                  W_specs=None, b_specs=None,
                  W_transpose=True, input_sample_shape=None):
-        """Apply linear/affine transformation, also called inner-product or
-        fully connected layer.
-
-        Args:
-            num_output (int): output feature length.
-            use_bias (bool): add a bias vector or not to the transformed feature
-            W_specs (dict): specs for the weight matrix
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): specs for the bias vector, same fields as W_specs.
-            W_transpose (bool): if true, output=x*W.T+b;
-            input_sample_shape (tuple): input feature length
-        """
         super(Dense, self).__init__(name)
         conf = self.conf.dense_conf
         conf.num_output = num_output
@@ -435,14 +519,14 @@ class Dense(Layer):
 
 
 class Dropout(Layer):
+    """Droput layer.
 
-    def __init__(self, name, p=0.5, input_sample_shape=None):
-        """Droput layer.
+    Args:
+        p (float): probability for dropping out the element, i.e., set to 0
+        name (string): layer name
+    """
 
-        Args:
-            p (float): probability for dropping out the element, i.e., set to 0
-            name (string): layer name
-        """
+    def __init__(self, name, p=0.5, input_sample_shape=None):
         super(Dropout, self).__init__(name)
         conf = self.conf.dropout_conf
         conf.dropout_ratio = p
@@ -456,15 +540,14 @@ class Dropout(Layer):
 
 
 class Activation(Layer):
+    """Activation layers.
 
+    Args:
+        name (string): layer name
+        mode (string): 'relu', 'sigmoid', or 'tanh'
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, mode='relu', input_sample_shape=None):
-        """Activation layers.
-
-        Args:
-            name (string): layer name
-            mode (string): 'relu', 'sigmoid', or 'tanh'
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Activation, self).__init__(name)
         self.conf.type = (engine + '_' + mode).lower()
         _check_engine(engine, ['cudnn', 'singa'])
@@ -474,15 +557,14 @@ class Activation(Layer):
 
 
 class Softmax(Layer):
+    """Apply softmax.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Apply softmax.
-
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
@@ -493,14 +575,14 @@ class Softmax(Layer):
 
 
 class Flatten(Layer):
+    """Reshape the input tensor into a matrix.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape for a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Reshape the input tensor into a matrix.
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape for a single sample
-        """
         super(Flatten, self).__init__(name)
         conf = self.conf.flatten_conf
         conf.axis = axis
@@ -511,26 +593,27 @@ class Flatten(Layer):
 
 
 class RNN(Layer):
+    '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu.
+
+    Args:
+        hidden_size: hidden feature size, the same for all stacks of layers.
+        rnn_mode: decides the rnn unit, which could be one of 'lstm', 'gru',
+            'tanh' and 'relu', refer to cudnn manual for each mode.
+        num_stacks: num of stacks of rnn layers. It is different to the
+            unrolling seqence length.
+        input_mode: 'linear' convert the input feature x by by a linear
+            transformation to get a feature vector of size hidden_size;
+            'skip' does nothing but requires the input feature size equals
+            hidden_size
+        bidirection: True for bidirectional RNN
+        param_specs: config for initializing the RNN parameters.
+        input_sample_shape: includes a single integer for the input sample
+            feature size.
+    '''
+
     def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0,
                  num_stacks=1, input_mode='linear', bidirectional=False,
                  param_specs=None, input_sample_shape=None):
-        '''Wrapper for singa::RNN class.
-
-        Args:
-            hidden_size, hidden feature size, the same for all stacks of layers.
-            rnn_mode, decides the rnn unit, which could be one of 'lstm', 'gru',
-                'tanh' and 'relu', refer to cudnn manual for each mode.
-            num_stacks, num of stacks of rnn layers. It is different to the
-                unrolling seqence length.
-            input_mode, 'linear' convert the input feature x by by a linear
-                transformation to get a feature vector of size hidden_size;
-                'skip' does nothing but requires the input feature size equals
-                hidden_size
-            bidirection, True for bidirectional RNN
-            param_specs, config for initializing the RNN parameters.
-            input_sample_shape, includes a single integer for the input sample
-                feature size.
-        '''
         super(RNN, self).__init__(name)
         conf = self.conf.rnn_conf
         assert hidden_size > 0, 'Hidden feature size must > 0'
@@ -605,7 +688,7 @@ class RNN(Layer):
 
         Returns:
             <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for
-            the i-th input, its shape is (batch_size,
+                the i-th input, its shape is (batch_size,
                 input_feature_length). dhx is the gradient for the initial
                 hidden state. dcx is the gradient for the initial cell state,
                 which is valid only for lstm.
@@ -741,5 +824,7 @@ def _construct_param_specs_from_dict(specs):
 
 
 def get_layer_list():
-    """ Return a list of strings reprensenting the all supported layers"""
+    """ Return a list of strings which include the identifiers (tags) of all
+    supported layers
+    """
     return singa_wrap.GetRegisteredLayers()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py
index acfb813..c88290b 100644
--- a/src/python/singa/loss.py
+++ b/src/python/singa/loss.py
@@ -15,32 +15,127 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+
+'''
+Loss module includes a set of training loss implmentations. Some are converted
+from C++ implementation, and the rest are implemented directly using python
+Tensor.
+
+Example usage::
+
+    from singa import tensor
+    from singa import loss
+    from singa.proto import model_pb2
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = loss.SoftmaxCrossEntropy()
+    l = f.forward(model_pb2.kTrain, x, y)  # l is tensor with 3 loss values
+    g = f.backward()  # g is a tensor containing all gradients of x w.r.t l
+'''
+
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Loss(object):
+    '''Base loss class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    backward, and evaluate functions of this base class. Other subclasses need
+    to override these functions
+    '''
 
     def __init__(self):
         self.swig_loss = None
 
     def forward(self, flag, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the loss values.
+
+        Args:
+            flag (int): kTrain or kEval. If it is kTrain, then the backward
+                function must be called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0]
+
+        Returns:
+            a tensor of floats for the loss values, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
 
     def backward(self):
-        """Return the grad of x w.r.t. the loss obj"""
+        '''
+        Returns:
+            the grad of x w.r.t. the loss
+        '''
         return tensor.from_raw_tensor(self.swig_loss.Backward())
 
-    def evaluate(self, flag, x, y):
-        """Return the averaged loss for all samples in x"""
+    def evaluate(self, flag, x, y):  # TODO(wangwei) remove flag
+        '''
+        Args:
+            flag (int): must be kEval, to be removed
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truth Tnesor
+
+        Returns:
+            the averaged loss for all samples in x.
+        '''
         return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
 
 
 class SoftmaxCrossEntropy(Loss):
+    '''This loss function is a combination of SoftMax and Cross-Entropy loss.
+
+    It converts the inputs via SoftMax function and then
+    computes the cross-entropy loss against the ground truth values.
+    '''
 
     def __init__(self):
         self.swig_loss = singa.SoftmaxCrossEntropy()
+
+
+class SquaredError(Loss):
+    '''This loss evaluates the squared error between the prediction and the
+    truth values.
+
+    It is implemented using Python Tensor operations.
+    '''
+    def __init__(self):
+        super(Loss, SquaredError).__init__()
+        self.err = None
+
+    def forward(self, flag, x, y):
+        '''Compute the error as 0.5 * ||x-y||^2.
+
+        Args:
+            flag (int): kTrain or kEval; if kTrain, then the backward must be
+                called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the truth Tensor, an integer value per sample, whose
+                value is [0, x.shape[1])
+
+        Returns:
+            a Tensor with one error value per sample
+        '''
+        self.err = x - y
+        return 0.5 * tensor.squared(self.err)
+
+    def backward(self):
+        '''Compute the gradient of x w.r.t the error.
+
+        Returns:
+            x - y
+        '''
+        return self.err
+
+    def evaluate(self, flag, x, y):
+        '''Compuate the averaged error.
+
+        Returns:
+            a float value as the averaged error
+        '''
+        return tensor.sum(0.5 * tensor.squared(x - y)) / x.size()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/metric.py
----------------------------------------------------------------------
diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py
index 31b6892..3a5750d 100644
--- a/src/python/singa/metric.py
+++ b/src/python/singa/metric.py
@@ -15,28 +15,71 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of metric classes for evaluating the model's
+performance. The specific metric classes could be converted from C++
+implmentation or implemented directly using Python.
+
+
+Example usage::
+
+    from singa import tensor
+    from singa import metric
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    x = tensor.SoftMax(x)  # normalize the prediction into probabilities
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = metric.Accuracy()
+    acc = f.evaluate(x, y)  # averaged accuracy over all 3 samples in x
+
+'''
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Metric(object):
+    '''Base metric class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    and evaluate functions of this base class. Other subclasses need
+    to override these functions. Users need to feed in the **predictions** and
+    ground truth to get the metric values.
+    '''
 
     def __init__(self):
         self.swig_metric = None
 
     def forward(self, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the metric for each sample.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+
+        Returns:
+            a tensor of floats, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_metric.Forward(x.singa_tensor, y.singa_tensor))
 
     def evaluate(self, x, y):
-        """Return the averaged metric for all samples in x"""
+        '''Compute the averaged metric over all samples.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+        Returns:
+            a float value for the averaged metric
+        '''
         return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor)
 
 
 class Accuracy(Metric):
+    '''Compute the top one accuracy for singel label prediction tasks.
 
+    It calls the C++ functions to do the calculation.
+    '''
     def __init__(self):
         self.swig_metric = singa.Accuracy()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 503527f..5d38997 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -15,7 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of optimizers for updating model parameters.
+
+Example usage::
+
+  from singa import optimizer
+  from singa import tensor
+
+  sgd = optimizer.SGD(lr=0.01, momentum=0.9, weight_decay=1e-4)
+  p = tensor.Tensor((3,5))
+  p.uniform(-1, 1)
+  g = tensor.Tensor((3,5))
+  g.gaussian(0, 0.01)
+
+  sgd.apply(1, g, p, 'param')  # use the global lr=0.1 for epoch 1
+  sgd.apply_with_lr(2, 0.03, g, p, 'param')  # use lr=0.03 for epoch 2
+'''
 
 from . import singa_wrap as singa
 import tensor
@@ -23,53 +38,44 @@ from proto import model_pb2
 
 
 class Optimizer(object):
-    """Base python optimizer.
-
-    Usages:
-        1. construct the optimizer
-        2. (optional) register each parameter with its specs.
-        3. use the optimizer to update parameter values given parameter
-            gradients and other optional info
-    """
-
+    '''The base python optimizer class.
+
+    Typically, an optimizer is used as follows:
+
+    1. construct the optimizer
+    2. (optional) register each parameter with its specs.
+    3. use the optimizer to update parameter values given parameter
+        gradients and other optional info
+
+    The subclasses should override the apply_with_lr function to do the real
+    parameter udpate.
+
+    Args:
+        lr (float): a constant for the learning rate, mutually exclusive with
+            'lr_gen'.
+        momentum (float): a constant for the momentum value
+        decay (float): the coefficent for L2 regularizer, which is mutually
+            exclusive with 'regularizer'.
+        lr_gen (function): a function returns the learning rate given
+            the current training step/epoch. It is mutually exclusive with lr.
+            If both are not set, the apply_with_lr function should be used for
+            param updating.
+        regularizer: an instance of Regularizer or RegularizerConf; If set,
+            regularization would be applied in apply_with_lr().
+            Users can also do regularization outside.
+        constraint: an instance of Constraint or ConstraintConf; If set,
+            constraint would be applied inside apply_with_lr(). Users can
+            also do regularization outside.
+    '''
     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
-                 momentum_gen=None, regularizer=None, constraint=None):
-        """Constructor.
-
-        Args:
-            lr: a constant or a function that generates learning rate given a
-                step, which is mutually exclusive with 'lr_gen'.
-            momentum: a constant or a function that generates the momentum value
-                given a step.
-            decay (float): the coefficent for L2 regularizer, which is mutually
-                exclusive with 'regularizer'.
-            lr_gen (function): a function returns the learning rate given
-                the current training step. It is mutually exclusive with lr. If
-                both are not set, the apply_with_lr function should be used for
-                param updating.
-            momentum_gen (function): a function returns the momentum value given
-                the current training step. It is mutually exclusive with
-                momentum.
-            regularizer: an instance of Regularizer or RegularizerConf; If set,
-                regularization would be applied in apply_with_lr().
-                Users can also do regularization outside.
-            constraint: an instance of Constraint or ConstraintConf; If set,
-                constraint would be applied inside apply_with_lr(). Users can
-                also do regularization outside.
-        """
+                 regularizer=None, constraint=None):
         if lr is not None:
             assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
 
-            def lr_gen(step):
+            def lr_gen(epoch):
                 return lr
         self.lr_gen = lr_gen
-        if momentum is not None:
-            assert momentum_gen is None, 'Cannot set momentum and momentum_gen'\
-                ' at the same time'
-
-            def momentum_gen(step):
-                return momentum
-        self.momentum_gen = momentum_gen
+        self.momentum = momentum
         if decay is not None:
             assert regularizer is None, \
                 'Cannot set decay and regularizer at the same time'
@@ -94,14 +100,15 @@ class Optimizer(object):
         self.learning_rate_multiplier = {}
 
     def register(self, name, specs):
-        """Register the param specs, including creating regularizer and
+        '''Register the param specs, including creating regularizer and
         constraint per param object. Param specific regularizer and constraint
         have higher priority than the global ones.
 
         Args:
             name (str): parameter name
-            specs (ParamSpec): protobuf obj
-        """
+            specs (ParamSpec): protobuf obj, including regularizer and
+                constraint, multipliers for learning rate and weight decay.
+        '''
         if specs.HasField('regularizer'):
             self.regularizers[name] = CppRegularizer(specs.constraint)
         if specs.HasField('constraint'):
@@ -111,8 +118,8 @@ class Optimizer(object):
         if specs.decay_mult != 1:
             self.decay_multiplier[name] = specs.decay_mult
 
-    def apply_regularizer_constraint(self, value, grad, name=None, step=None):
-        """Apply regularization and constraint if available.
+    def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+        '''Apply regularization and constraint if available.
 
         If there are both global regularizer (constraint) and param specific
         regularizer (constraint), it would use the param specific one.
@@ -121,46 +128,48 @@ class Optimizer(object):
             value (Tensor): parameter value Tensor
             grad (Tensor): parameter gradient Tensor
             name (string): to get parameter specific regularizer or constraint
-            step (int): some regularizer or constraint would use step
+            epoch (int): some regularizer or constraint would use epoch
 
-        Return:
+        Returns:
             the updated gradient Tensor
-        """
+        '''
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, step)
+            self.constraints[name].apply(value, grad, epoch)
         elif self.constraint is not None:
-            self.constraint.apply(step, value, grad)
+            self.constraint.apply(epoch, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, step)
+            self.regularizers[name].apply(value, grad, epoch)
         elif self.regularizer is not None:
-            self.regularizer.apply(step, value, grad)
+            self.regularizer.apply(epoch, value, grad)
         return grad
 
-    def apply_with_lr(self, step, lr, grad, value, name=None):
-        """Do update with given learning rate.
+    def apply_with_lr(self, epoch, lr, grad, value, name=None):
+        '''Do update with given learning rate.
 
         The subclass optimizer must override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             lr (float): learning rate
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
                 updating rules (including regularizer and constraint)
 
-        Return:
+        Returns:
             updated parameter value
-        """
+        '''
         assert False, 'This is the base function, pls call the subclass func'
         return value
 
-    def apply(self, step, grad, value, name=None):
-        """Do update assume the learning rate generator is set.
+    def apply(self, epoch, grad, value, name=None):
+        '''Do update assuming the learning rate generator is set.
 
         The subclass optimizer does not need to override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
@@ -168,97 +177,109 @@ class Optimizer(object):
 
         Return:
             updated parameter value
-        """
-
+        '''
         assert self.lr_gen is not None, 'Learning rate generator is not set.'\
             'Either set the lr_gen in constructor or call apply_with_lr'
-        lr = self.lr_gen(step)
-        return self.apply_with_lr(step, lr, grad, value, name)
+        lr = self.lr_gen(epoch)
+        return self.apply_with_lr(epoch, lr, grad, value, name)
 
 
 class SGD(Optimizer):
+    '''The vallina Stochasitc Gradient Descent algorithm with momentum.
 
-    def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
-        """The vallina Stochasitc Gradient Descent algorithm.
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(SGD, self).__init__(lr, momentum, decay)
+    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                  constraint)
         conf = model_pb2.OptimizerConf()
+        conf.momentum = self.momentum
+        conf.type = 'sgd'
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Nesterov(Optimizer):
+    '''The SGD with Nesterov momentum.
 
-    def __init__(self, lr=None, momentum=0.9, decay=None, **kwargs):
-        """The SGD with Nesterov momentum
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
+    def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                       constraint)
         conf = model_pb2.OptimizerConf()
+        conf.momentum = momentum
+        conf.type = 'nesterov'
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class AdaGrad(Optimizer):
+    '''AdaGrad optimizer.
 
-    def __init__(self, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """AdaGrad optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            epsilon (float): small number for preventing numeric error.
-        """
-        super(RMSProp, self).__init__(lr, decay, **kwargs)
+    Args:
+        epsilon (float): small number for preventing numeric error.
+    '''
+    def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
+        conf.type = 'adagrad'
         self.opt = singa.CreateOptimizer('AdaGrad')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class RMSProp(Optimizer):
+    '''RMSProp optimizer.
 
-    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """RMSProp optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            rho (float): float within [0, 1]
-            epsilon (float): small value for preventing numeric error
-        """
-        super(RMSProp, self).__init__(lr, decay, kwargs)
+    Args:
+        rho (float): float within [0, 1]
+        epsilon (float): small value for preventing numeric error
+    '''
+
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.rho = rho
         conf.delta = epsilon
         self.opt = singa.CreateOptimizer('RMSProp')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Regularizer(object):
-    """Base Python regularizer for parameter gradients.
-    """
+    '''Base Python regularizer for parameter gradients.'''
 
     def apply(self, value, grad):
         assert False, 'Not Implemented. Call the subclass function.'
@@ -266,34 +287,32 @@ class Regularizer(object):
 
 
 class CppRegularizer(Regularizer):
-    """Wrapper for regularizer implemented using C++.
-    """
+    '''Wrapper for regularizer implemented using C++.
 
-    def __init__(self, conf):
-        """Constructor.
+    Args:
+        conf (RegularizerConf): protobuf message for the configuration.
+    '''
 
-        Args:
-            conf (RegularizerConf): protobuf message for the configuration.
-        """
+    def __init__(self, conf):
         self.reg = singa.CreateRegularizer(conf.type)
         self.reg.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.reg.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.reg.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Regularizer(Regularizer):
-    """L2 regularization"""
+    '''L2 regularization
+
+    Args:
+        coefficient (float): regularization coefficient.
+    '''
 
     def __init__(self, coefficient):
-        """
-        Args:
-            coefficient (float): regularization coefficient.
-        """
         self.coefficient = coefficient
 
-    def apply(self, step, value, grad, coefficient=None):
+    def apply(self, epoch, value, grad, coefficient=None):
         if coefficient is None:
             assert self.coefficient is not None, 'Must set the coefficient'
             coefficient = self.coefficient
@@ -302,39 +321,34 @@ class L2Regularizer(Regularizer):
 
 
 class Constraint(object):
-    """Base Python constraint class for paramter gradients.
-    """
+    '''Base Python constraint class for paramter gradients'''
 
-    def apply(self, step, value, grad):
+    def apply(self, epoch, value, grad):
         return grad
 
 
 class CppConstraint(Constraint):
-    """Wrapper for constraints implemented using C++.
-    """
+    '''Wrapper for constraints implemented using C++.
 
+    Args:
+        conf (ConstraintConf): protobuf message for the configuration.
+    '''
     def __init__(self, conf):
-        """Constructor.
-
-        Args:
-            conf (ConstraintConf): protobuf message for the configuration.
-        """
         self.constraint = singa.CreateConstraint(conf.type)
         self.constraint.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.constraint.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.constraint.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Constraint(Constraint):
-    """Rescale the gradient to make the L2 norm <= a given threshold.
-    """
+    '''Rescale the gradient to make the L2 norm <= a given threshold'''
 
     def __init__(self, threshold=None):
         self.threshold = threshold
 
-    def apply(self, step, value, grad, threshold=None):
+    def apply(self, epoch, value, grad, threshold=None):
         if threshold is None:
             assert self.threshold is not None, 'Must set the threshold'
             threshold = self.threshold

[08/51] [abbrv] incubator-singa git commit: Merge PR #235 which fixed a bug in batchnorm.cc

Posted by wa...@apache.org.

Merge PR #235 which fixed a bug in batchnorm.cc


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/94ffe55d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/94ffe55d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/94ffe55d

Branch: refs/heads/master
Commit: 94ffe55daf1253431b6576e211689a0ad7d30b72
Parents: 6b2ff3c 055ff17
Author: Wei Wang <wa...@gmail.com>
Authored: Fri Aug 12 10:40:18 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Fri Aug 12 10:57:32 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                     |  2 +-
 examples/cifar10/train.py          |  2 +-
 examples/cifar10/vgg.py            | 16 ++++++++--------
 src/model/layer/cudnn_batchnorm.cc | 16 ++++++++--------
 test/singa/test_cudnn_batchnorm.cc | 22 ++++++++--------------
 5 files changed, 26 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/94ffe55d/CMakeLists.txt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/94ffe55d/examples/cifar10/train.py
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/94ffe55d/examples/cifar10/vgg.py
----------------------------------------------------------------------

[22/51] [abbrv] incubator-singa git commit: SINGA-210 Enable checkpoint and resume for v1.0

Posted by wa...@apache.org.

SINGA-210 Enable checkpoint and resume for v1.0

comment out read & write double type testcase in test_snapshot.cc
to make test passed on Mac OS,
because CopyDataFromHostPtr() method in tensor class
cannot handle double type temporarily.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f80fedb8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f80fedb8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f80fedb8

Branch: refs/heads/master
Commit: f80fedb8245b93f6945f12e3e42a2e175aa1fdcc
Parents: 1db2784
Author: WANG Ji <ij...@gmail.com>
Authored: Mon Aug 15 14:31:09 2016 +0800
Committer: WANG Ji <ij...@gmail.com>
Committed: Mon Aug 15 14:31:09 2016 +0800

----------------------------------------------------------------------
 test/singa/test_snapshot.cc | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f80fedb8/test/singa/test_snapshot.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_snapshot.cc b/test/singa/test_snapshot.cc
index e83145b..33b57b9 100644
--- a/test/singa/test_snapshot.cc
+++ b/test/singa/test_snapshot.cc
@@ -30,8 +30,10 @@
 const std::string prefix = "./snapshot_test";
 const float param_1_data[] = {0.1, 0.2, 0.3, 0.4};
 const float param_2_data[] = {0.2, 0.1, 0.4, 0.3};
-const std::string desc_1 = "parameter name: Param_1\tdata type: 0\tdim: 1\tshape: 4";
-const std::string desc_2 = "parameter name: Param_2\tdata type: 0\tdim: 2\tshape: 2 2";
+const std::string desc_1 =
+    "parameter name: Param_1\tdata type: 0\tdim: 1\tshape: 4";
+const std::string desc_2 =
+    "parameter name: Param_2\tdata type: 0\tdim: 2\tshape: 2 2";
 const int int_data[] = {1, 3, 5, 7};
 const double double_data[] = {0.2, 0.4, 0.6, 0.8};
 
@@ -63,7 +65,7 @@ TEST(Snapshot, ReadTest) {
   const float* data_2 = param_2.data<float>();
   for (size_t i = 0; i < singa::Product(shape2); ++i)
     EXPECT_FLOAT_EQ(data_2[i], param_2_data[i]);
-  std::ifstream desc_file(prefix+".desc");
+  std::ifstream desc_file(prefix + ".desc");
   std::string line;
   getline(desc_file, line);
   EXPECT_EQ(line, desc_1);
@@ -73,7 +75,8 @@ TEST(Snapshot, ReadTest) {
 
 TEST(Snapshot, ReadIntTest) {
   {
-    singa::Snapshot int_snapshot_write(prefix+".int", singa::Snapshot::kWrite);
+    singa::Snapshot int_snapshot_write(prefix + ".int",
+                                       singa::Snapshot::kWrite);
     singa::Tensor int_param(singa::Shape{4});
     int_param.AsType(singa::kInt);
     int_param.CopyDataFromHostPtr(int_data, 4);
@@ -81,7 +84,7 @@ TEST(Snapshot, ReadIntTest) {
   }
 
   {
-    singa::Snapshot int_snapshot_read(prefix+".int", singa::Snapshot::kRead);
+    singa::Snapshot int_snapshot_read(prefix + ".int", singa::Snapshot::kRead);
     singa::Shape shape;
     shape = int_snapshot_read.ReadShape("IntParam");
     EXPECT_EQ(shape.size(), 1u);
@@ -93,9 +96,11 @@ TEST(Snapshot, ReadIntTest) {
   }
 }
 
+/*
 TEST(Snapshot, ReadDoubleTest) {
   {
-    singa::Snapshot double_snapshot_write(prefix+".double", singa::Snapshot::kWrite);
+    singa::Snapshot double_snapshot_write(prefix + ".double",
+                                          singa::Snapshot::kWrite);
     singa::Tensor double_param(singa::Shape{4});
     double_param.AsType(singa::kDouble);
     double_param.CopyDataFromHostPtr(double_data, 4);
@@ -103,7 +108,8 @@ TEST(Snapshot, ReadDoubleTest) {
   }
 
   {
-    singa::Snapshot double_snapshot_read(prefix+".double", singa::Snapshot::kRead);
+    singa::Snapshot double_snapshot_read(prefix + ".double",
+                                         singa::Snapshot::kRead);
     singa::Shape shape;
     shape = double_snapshot_read.ReadShape("DoubleParam");
     EXPECT_EQ(shape.size(), 1u);
@@ -114,3 +120,4 @@ TEST(Snapshot, ReadDoubleTest) {
       EXPECT_EQ(param_data[i], double_data[i]);
   }
 }
+*/

[38/51] [abbrv] incubator-singa git commit: fix minor bug

Posted by wa...@apache.org.

fix minor bug

1. in pool.cc due to buf check, which should be done only for average pooling
2. update the tensor.py to avoid error in to_numpy() due to
inconsistency of swig tensor and py tensor members, e.g. device/shape.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/22889bc5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/22889bc5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/22889bc5

Branch: refs/heads/master
Commit: 22889bc5e5537c2ecc1607d01eb3bdb19fbaa7dc
Parents: 0a76425
Author: Wei Wang <wa...@gmail.com>
Authored: Tue Aug 16 15:18:07 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Tue Aug 16 15:39:39 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/train.py | 36 +++++++++++++++++++-----------------
 src/model/layer/pooling.cc | 17 +++++++++--------
 src/python/singa/tensor.py | 16 ++++++++++------
 3 files changed, 38 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 137df80..d28646e 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -98,7 +98,7 @@ def get_lr(epoch):
 
 
 def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
-          num_stacks=1, dropout=0.5, model_path='model.bin'):
+          num_stacks=1, dropout=0.5, model_path='model'):
     # SGD with L2 gradient normalization
     opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
@@ -194,22 +194,24 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
         print 'Epoch %d, evaluation loss is %f' % \
             (epoch, eval_loss / data.num_test_batch / seq_length)
 
-    # checkpoint the file model
-    with open(model_path, 'wb') as fd:
-        print 'saving model to %s' % model_path
-        d = {}
-        for name, w in zip(
-                ['rnn_w', 'dense_w', 'dense_b'],
-                [rnn_w, dense_w, dense_b]):
-            w.to_host()
-            d[name] = tensor.to_numpy(w)
-        d['idx_to_char'] = data.idx_to_char
-        d['char_to_idx'] = data.char_to_idx
-        d['hidden_size'] = hidden_size
-        d['num_stacks'] = num_stacks
-        d['dropout'] = dropout
-
-        pickle.dump(d, fd)
+        if (epoch + 1) % 30 == 0:
+            # checkpoint the file model
+            with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
+                print 'saving model to %s' % model_path
+                d = {}
+                for name, w in zip(
+                        ['rnn_w', 'dense_w', 'dense_b'],
+                        [rnn_w, dense_w, dense_b]):
+                    w.to_host()
+                    d[name] = tensor.to_numpy(w)
+                    w.to_device(cuda)
+                d['idx_to_char'] = data.idx_to_char
+                d['char_to_idx'] = data.char_to_idx
+                d['hidden_size'] = hidden_size
+                d['num_stacks'] = num_stacks
+                d['dropout'] = dropout
+
+                pickle.dump(d, fd)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index a18f9de..1312776 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -107,27 +107,28 @@ const std::pair<Tensor, vector<Tensor>> Pooling::Backward(int flag,
   CHECK_EQ(grad.device()->lang(), kCpp);
   CHECK_EQ(grad.nDim(), 4u);
   vector<Tensor> param_grad;
-  CHECK(!buf_.empty());
-  Tensor mask = buf_.top();
-  buf_.pop();
-  size_t batchsize = grad.shape(0);
+    size_t batchsize = grad.shape(0);
   Shape shape{batchsize, channels_, height_, width_};
   auto dev = grad.device();
   DataType dtype = grad.data_type();
   Tensor dx(shape, dev, dtype);
   auto gradptr = grad.data<float>();
-  auto maskptr = mask.data<float>();
   float* dxptr = new float[dx.Size()];
-  if (pool_ == PoolingConf_PoolMethod_MAX)
+  if (pool_ == PoolingConf_PoolMethod_MAX) {
+    CHECK(!buf_.empty());
+    Tensor mask = buf_.top();
+    buf_.pop();
+    auto maskptr = mask.data<float>();
     BackwardMaxPooling(gradptr, maskptr, batchsize, channels_, height_, width_,
                        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_,
                        stride_w_, dxptr);
-  else if (pool_ == PoolingConf_PoolMethod_AVE)
+  } else if (pool_ == PoolingConf_PoolMethod_AVE) {
     BackwardAvgPooling(gradptr, batchsize, channels_, height_, width_,
                        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_,
                        stride_w_, dxptr);
-  else
+  } else {
     LOG(FATAL) << "Unknow pooling method";
+  }
 
   dx.CopyDataFromHostPtr(dxptr, dx.Size());
   delete[] dxptr;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index a1e948d..f6bca43 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -79,15 +79,14 @@ class Tensor(object):
             return
         else:
             assert isinstance(shape, tuple), 'shape should be tuple'
-            vs = list(shape)
             if device is None:
                 device = pydevice.get_default_device()
-                self.singa_tensor = singa.Tensor(vs, device, dtype)
+                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
             else:
-                self.singa_tensor = singa.Tensor(vs, device, dtype)
-            self.shape = shape
-            self.device = device
-            self.dtype = dtype
+                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
+        self.shape = shape
+        self.dtype = dtype
+        self.device = device
 
     def ndim(self):
         '''
@@ -136,6 +135,9 @@ class Tensor(object):
             t (Tensor)
         '''
         self.singa_tensor.ResetLike(t.singa_tensor)
+        self.shape = t.shape
+        self.device = t.device
+        self.dtype = t.dtype
 
     '''
     def as_type(self, dtype):
@@ -153,11 +155,13 @@ class Tensor(object):
             device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
         '''
         self.singa_tensor.ToDevice(device)
+        self.device = device
 
     def to_host(self):
         '''Move the tensor data onto the default host CppCPU device.
         '''
         self.singa_tensor.ToHost()
+        self.device = pydevice.default_device
 
     def l2(self):
         '''

[28/51] [abbrv] incubator-singa git commit: Fixed the bug leading to wired accuracy (nan), which was caused by forgeting to average the gradient over the whole mini-batch. That is why we need a lower learning rate and could not use momentum. Update the l

Posted by wa...@apache.org.

Fixed the bug leading to wired accuracy (nan), which was caused by forgeting
to average the gradient over the whole mini-batch. That is why we need a lower
learning rate and could not use momentum.
Update the lr in optimzier.py to time the multiplier
Fix the bug from mis-setting the pooling type of alexnet.py (max-->avg)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d4539ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d4539ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d4539ee

Branch: refs/heads/master
Commit: 6d4539eed2ae200a3a904a70cb789fc1b39d0f38
Parents: 1db2784
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 13:13:19 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Mon Aug 15 20:16:30 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/alexnet.cc   |  11 +-
 examples/cifar10/alexnet.py   |  13 +-
 examples/cifar10/train.py     |  19 ++-
 src/model/feed_forward_net.cc |   6 +-
 src/model/optimizer/sgd.cc    |   4 +-
 src/python/singa/__init__.py  | 240 -------------------------------------
 src/python/singa/layer.py     |  15 +--
 src/python/singa/net.py       |   8 +-
 src/python/singa/optimizer.py |  36 ++++--
 src/python/singa/tensor.py    |   8 +-
 10 files changed, 68 insertions(+), 292 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index e1363e4..8051d1b 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -134,7 +134,7 @@ FeedForwardNet CreateNet() {
   return net;
 }
 
-void Train(float lr, int num_epoch, string data_dir) {
+void Train(int num_epoch, string data_dir) {
   Cifar10 data(data_dir);
   Tensor train_x, train_y, test_x, test_y;
   {
@@ -161,11 +161,11 @@ void Train(float lr, int num_epoch, string data_dir) {
   auto net = CreateNet();
   SGD sgd;
   OptimizerConf opt_conf;
-  opt_conf.set_momentum(0.9);
+  // opt_conf.set_momentum(0.9);
   auto reg = opt_conf.mutable_regularizer();
   reg->set_coefficient(0.004);
   sgd.Setup(opt_conf);
-  sgd.SetLearningRateGenerator([lr](int step) {
+  sgd.SetLearningRateGenerator([](int step) {
     if (step <= 120)
       return 0.001;
     else if (step <= 130)
@@ -193,14 +193,11 @@ int main(int argc, char **argv) {
   int pos = singa::ArgPos(argc, argv, "-epoch");
   int nEpoch = 1;
   if (pos != -1) nEpoch = atoi(argv[pos + 1]);
-  pos = singa::ArgPos(argc, argv, "-lr");
-  float lr = 0.001;
-  if (pos != -1) lr = atof(argv[pos + 1]);
   pos = singa::ArgPos(argc, argv, "-data");
   string data = "cifar-10-batches-bin";
   if (pos != -1) data = argv[pos + 1];
 
   LOG(INFO) << "Start training";
-  singa::Train(lr, nEpoch, data);
+  singa::Train(nEpoch, data);
   LOG(INFO) << "End training";
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index ddad1d5..dae129f 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -20,9 +20,6 @@ Following the same setting for hyper-parameters and data pre-processing, the fin
 validation accuracy would be about 82%.
 '''
 
-import sys
-import os
-
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
 from singa import initializer
@@ -39,18 +36,18 @@ def create_net(use_cpu=False):
     W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}
     W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
     W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
-    b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
+    b_specs = {'init': 'constant', 'value': 0, 'lr_mult': 2, 'decay_mult': 0}
     net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
     net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
     net.add(layer.Activation('relu1'))
-    net.add(layer.LRN(name='lrn1'))
+    net.add(layer.LRN(name='lrn1', size=3, alpha=5e-5))
     net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
     net.add(layer.Activation('relu2'))
-    net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
-    net.add(layer.LRN('lrn2'))
+    net.add(layer.AvgPooling2D('pool2', 3, 2,  pad=1))
+    net.add(layer.LRN('lrn2', size=3, alpha=5e-5))
     net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
     net.add(layer.Activation('relu3'))
-    net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
+    net.add(layer.AvgPooling2D('pool3', 3, 2, pad=1))
     net.add(layer.Flatten('flat'))
     net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
     for (p, specs) in zip(net.param_values(), net.param_specs()):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index de03750..2091ee5 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -22,7 +22,6 @@ includes 1 label & 3072 pixels.  3072 pixels are 3 channels of a 32x32 image
 import cPickle
 import numpy as np
 import os
-import sys
 import argparse
 
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
@@ -84,7 +83,7 @@ def normalize_for_alexnet(train_x, test_x):
 
 
 def vgg_lr(epoch):
-    return 0.01 / float(1 << ((epoch / 30)))
+    return 0.1 / float(1 << ((epoch / 25)))
 
 
 def alexnet_lr(epoch):
@@ -92,7 +91,7 @@ def alexnet_lr(epoch):
         return 0.001
     elif epoch < 130:
         return 0.0001
-    elif epoch < 140:
+    else:
         return 0.00001
 
 
@@ -107,8 +106,8 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         dev = device.create_cuda_gpu()
 
     net.to_device(dev)
-    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
-    for (p, specs) in zip(net.param_values(), net.param_specs()):
+    opt = optimizer.SGD(momentum=0.9, decay=weight_decay)
+    for (p, specs) in zip(net.param_names(), net.param_specs()):
         opt.register(p, specs)
 
     tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
@@ -129,13 +128,13 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
             grads, (l, a) = net.train(tx, ty)
             loss += l
             acc += a
-            for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
-                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
+            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
+                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s))
             # update progress bar
             utils.update_progress(b * 1.0 / num_train_batch,
                                   'training loss = %f, accuracy = %f' % (l, a))
-        info = '\ntraining loss = %f, training accuracy = %f' \
-            % (loss / num_train_batch, acc / num_train_batch)
+        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
+            % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch))
         print info
 
         loss, acc = 0.0, 0.0
@@ -167,7 +166,7 @@ if __name__ == '__main__':
     if args.model == 'alexnet':
         train_x, test_x = normalize_for_alexnet(train_x, test_x)
         net = alexnet.create_net(args.use_cpu)
-        train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004,
+        train((train_x, train_y, test_x, test_y), net, 160, alexnet_lr, 0.004,
               use_cpu=args.use_cpu)
     else:
         train_x, test_x = normalize_for_vgg(train_x, test_x)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/model/feed_forward_net.cc
----------------------------------------------------------------------
diff --git a/src/model/feed_forward_net.cc b/src/model/feed_forward_net.cc
index 514d6e2..3875430 100644
--- a/src/model/feed_forward_net.cc
+++ b/src/model/feed_forward_net.cc
@@ -206,8 +206,8 @@ const std::pair<float, float> FeedForwardNet::TrainOnBatch(int epoch,
 
 const Tensor FeedForwardNet::Forward(int flag, const Tensor& data) {
   Tensor input = data, output;
+  // LOG(INFO) << data.L1();
   for (auto layer : layers_) {
-    //    LOG(INFO) << layer->name() << ": " << input.L1();
     output = layer->Forward(flag, input);
     // LOG(INFO) << layer->name() << ": " << output.L2();
     input = output;
@@ -220,13 +220,13 @@ const vector<Tensor> FeedForwardNet::Backward(int flag, const Tensor& grad) {
   std::stack<Tensor> buf;
   Tensor tmp = grad;
   for (int i = layers_.size() - 1; i >= 0; i--) {
-    //   LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1();
+    // LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1();
     auto ret = layers_.at(i)->Backward(flag, tmp);
     tmp = ret.first;
     if (ret.second.size()) {
       for (int k = ret.second.size() - 1; k >= 0; k--) {
         buf.push(ret.second[k]);
-        //       LOG(INFO) <<  "      " << buf.top().L1();
+        // LOG(INFO) <<  "      " << buf.top().L1();
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/model/optimizer/sgd.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/sgd.cc b/src/model/optimizer/sgd.cc
index d78d5b8..ac453cd 100644
--- a/src/model/optimizer/sgd.cc
+++ b/src/model/optimizer/sgd.cc
@@ -33,6 +33,7 @@ void SGD::Setup(const OptimizerConf& conf) {
 // value = value - history
 void SGD::Apply(int step, float lr, const string& name, const Tensor& grad,
                 Tensor& value) {
+  // LOG(INFO) << "param " << name  << " lr = " << lr << " grad = " << grad.L1() << " value = " << value.L1();
   if (momentum_generator_) {
     float mom = momentum_generator_(step);
     if (mom != 0) {
@@ -46,9 +47,8 @@ void SGD::Apply(int step, float lr, const string& name, const Tensor& grad,
       value -= history;
       return;
     }
-  } else {
-    Axpy(-lr, grad, &value);
   }
+  Axpy(-lr, grad, &value);
 }
 }  // namespace singa
 #endif  // SRC_MODEL_OPTIMIZER_SGD_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index f14c8c5..e69de29 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-
-'''
-This script is the main entrance for user to run singa inside a model workspace
-
-To use this script, user sudo install these dependencies: flask pillow and protobuf
-'''
-
-import sys, glob, os, random, shutil, time
-from flask import Flask, request, redirect, url_for
-import numpy as np
-import ConfigParser
-import urllib, traceback
-
-
-from argparse import ArgumentParser
-from argparse import RawDescriptionHelpFormatter
-sys.path.append(os.getcwd())
-
-__all__ = []
-__version__ = 0.1
-__date__ = '2016-07-20'
-__updated__ = '2016-07-20'
-__shortdesc__ = '''
-welcome to singa
-'''
-
-app = Flask(__name__)
-config = ConfigParser.RawConfigParser()
-service = {}
-data_path = "data_"
-parameter_path = "parameter_"
-
-debug = False
-
-class CLIError(Exception):
-    '''Generic exception to raise and log different fatal errors.'''
-    def __init__(self, msg):
-        super(CLIError).__init__(type(self))
-        self.msg = "E: %s" % msg
-    def __str__(self):
-        return self.msg
-    def __unicode__(self):
-        return self.msg
-
-def main(argv=None): # IGNORE:C0111
-    '''Command line options.'''
-
-    from . import device
-
-    if argv is None:
-        argv = sys.argv
-    else:
-        sys.argv.extend(argv)
-
-    program_name = os.path.basename(sys.argv[0])
-    program_version = "v%s" % __version__
-    program_build_date = str(__updated__)
-    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
-    program_shortdesc = __shortdesc__
-    program_license = '''%s
-
-  Created by dbsystem group on %s.
-  Copyright 2016 NUS School of Computing. All rights reserved.
-
-  Licensed under the Apache License 2.0
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Distributed on an "AS IS" basis without warranties
-  or conditions of any kind, either express or implied.
-
-USAGE
-''' % (program_shortdesc, str(__date__))
-
-    global debug
-
-    try:
-        # Setup argument parser
-        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
-        parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
-        parser.add_argument("-param", "--parameter", dest="parameter",  help="the parameter file path to be loaded")
-        parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
-        parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
-        parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
-        parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
-        parser.add_argument('-V', '--version', action='version', version=program_version_message)
-
-        # Process arguments
-        args = parser.parse_args()
-
-        port = args.port
-        parameter_file = args.parameter
-        mode = args.mode
-        need_reload = args.reload_data
-        use_cpu = args.use_cpu
-        debug = args.debug
-
-        #prepare data files
-        config.read('file.cfg')
-        file_prepare(need_reload)
-
-
-        import network as net
-        model = net.create()
-
-        #load parameter
-        parameter_file=get_parameter(parameter_file)
-
-        if parameter_file:
-            print "load parameter file: %s" % parameter_file
-            model.load(parameter_file)
-
-        if use_cpu:
-            raise CLIError("Currently cpu is not support!")
-        else:
-            print "runing with gpu"
-            d = device.create_cuda_gpu()
-
-        model.to_device(d)
-
-        if mode == "serve":
-            print "runing singa in serve mode, listen to  port: %s " % port
-            global service
-            from serve import Service
-            service =Service(model,d)
-
-            app.debug = debug
-            app.run(host='0.0.0.0', port= port)
-        elif mode == "train":
-            print "runing singa in train mode"
-            global trainer
-            from train import Trainer
-            trainer= Trainer(model,d)
-            if not parameter_file:
-                trainer.initialize()
-            trainer.train()
-        else:
-            raise CLIError("Currently only serve mode is surpported!")
-        return 0
-    except KeyboardInterrupt:
-        ### handle keyboard interrupt ###
-        return 0
-    except Exception, e:
-        if debug:
-            traceback.print_exc()
-            raise(e)
-        indent = len(program_name) * " "
-        sys.stderr.write(program_name + ": " + str(e) + "\n")
-        sys.stderr.write(indent + "  for help use --help \n\n")
-        return 2
-
-def file_prepare(reload_data=False):
-    '''
-        download all files and generate data.py
-    '''
-    if not reload_data and os.path.exists("data_.py"):
-        return
-
-    print "download file"
-    #clean data
-    shutil.rmtree("data_.py",ignore_errors=True)
-    shutil.rmtree("data_",ignore_errors=True)
-
-    data_py=open("data_.py",'w')
-    data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
-    if config.has_section("data"):
-        file_list = config.items("data")
-        #download files
-        for f in file_list:
-            name,path=download_file(f[0],f[1],data_path)
-            data_py.write("%s=\"%s\"\n" % (name,path))
-
-    data_py.flush()
-    data_py.close()
-
-    if config.has_section("parameter"):
-        parameter_list = config.items("parameter")
-        for p in parameter_list:
-            download_file(p[0],p[1],parameter_path)
-
-def download_file(name,path,dest):
-    '''
-    download one file to dest
-    '''
-    if not os.path.exists(dest):
-        os.makedirs(dest)
-    if (path.startswith('http')):
-        file_name = path.split('/')[-1]
-        target = os.path.join(dest,file_name)
-        urllib.urlretrieve(path,target)
-    return name,target
-
-
-def get_parameter(file_name=None):
-    '''
-    get the paticular file name or get the last parameter file
-    '''
-    if not os.path.exists(parameter_path):
-        os.makedirs(parameter_path)
-        return
-
-    if file_name:
-	return os.path.join(parameter_path,file_name)
-
-    parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
-    if len(parameter_list)==0:
-        return
-    parameter_list.sort()
-
-    return parameter_list[-1]
-
-@app.route("/")
-def index():
-    return "Hello SINGA User!"
-
-@app.route('/predict', methods=['POST'])
-def predict():
-    if request.method == 'POST':
-        try:
-            response=service.serve(request)
-        except Exception as e:
-            return e
-        return response
-    return "error, should be post request"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..1e9caeb 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -362,8 +362,8 @@ class BatchNormalization(Layer):
 
 
 class LRN(Layer):
-    def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
-                 k=1, input_sample_shape=None):
+    def __init__(self, name, size=5, alpha=1e-4, beta=0.75,
+                 mode='cross_channel', k=1, input_sample_shape=None):
         """Local response normalization.
 
         Args:
@@ -391,7 +391,7 @@ class Dense(Layer):
 
     def __init__(self, name, num_output, use_bias=True,
                  W_specs=None, b_specs=None,
-                 W_transpose=True, input_sample_shape=None):
+                 W_transpose=False, input_sample_shape=None):
         """Apply linear/affine transformation, also called inner-product or
         fully connected layer.
 
@@ -424,10 +424,10 @@ class Dense(Layer):
             W_specs['name'] = name + '_weight'
         if 'name' not in b_specs:
             b_specs['name'] = name + '_bias'
-        self.conf.param.extend([_construct_param_specs_from_dict(W_specs)])
-        self.param_specs.append(_construct_param_specs_from_dict(W_specs))
-        self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
-        self.param_specs.append(_construct_param_specs_from_dict(b_specs))
+        wspecs = _construct_param_specs_from_dict(W_specs)
+        bspecs = _construct_param_specs_from_dict(b_specs)
+        self.conf.param.extend([wspecs, bspecs])
+        self.param_specs.extend([wspecs, bspecs])
         # dense layer is transparent to engine.
         self.layer = _create_layer('singa', 'Dense')
         if input_sample_shape is not None:
@@ -712,6 +712,7 @@ def _construct_param_specs_from_dict(specs):
         a ParamSpec object
     """
     conf = model_pb2.ParamSpec()
+    print 'convert', specs
     if 'name' in specs:
         conf.name = specs['name']
     if 'lr_mult' in specs:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
index f040378..3a1732c 100644
--- a/src/python/singa/net.py
+++ b/src/python/singa/net.py
@@ -95,16 +95,22 @@ class FeedForwardNet(object):
         # print x.l1()
         for lyr in self.layers:
             x = lyr.forward(flag, x)
-        #    print lyr.name, x.l1()
+            # print lyr.name, x.l1()
         return x
 
     def backward(self):
         grad = self.loss.backward()
+        if len(grad.shape) > 1:
+            grad /= grad.shape[0]  # average across the batch
+        # print 'grad', grad.l1()
         pgrads = []
         for lyr in reversed(self.layers):
             grad, _pgrads = lyr.backward(kTrain, grad)
+            # disp = '%f ' % grad.l1()
             for g in reversed(_pgrads):
                 pgrads.append(g)
+                # disp = disp + ', %f ' % g.l1()
+            # print disp
         return reversed(pgrads)
 
     def save(self, f):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index aa6bdd1..32f03d4 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -102,16 +102,19 @@ class Optimizer(object):
             name (str): parameter name
             specs (ParamSpec): protobuf obj
         """
-	assert type(specs) == model_pb2.ParamSpec, \
-		'specs should be model_pb2.ParamSpec instance'
+        assert type(specs) == model_pb2.ParamSpec, \
+            'specs should be model_pb2.ParamSpec instance'
         if specs.HasField('regularizer'):
             self.regularizers[name] = CppRegularizer(specs.regularizer)
+        elif specs.decay_mult != 1:
+            self.regularizers[name] = L2Regularizer(
+                specs.decay_mult * self.regularizer.coefficient)
+
         if specs.HasField('constraint'):
             self.constraints[name] = CppConstraint(specs.constraint)
+
         if specs.lr_mult != 1:
             self.learning_rate_multiplier[name] = specs.lr_mult
-        if specs.decay_mult != 1:
-            self.decay_multiplier[name] = specs.decay_mult
 
     def apply_regularizer_constraint(self, value, grad, name=None, step=None):
         """Apply regularization and constraint if available.
@@ -129,12 +132,12 @@ class Optimizer(object):
             the updated gradient Tensor
         """
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, step)
+            self.constraints[name].apply(step, value, grad)
         elif self.constraint is not None:
             self.constraint.apply(step, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, step)
+            self.regularizers[name].apply(step, value, grad)
         elif self.regularizer is not None:
             self.regularizer.apply(step, value, grad)
         return grad
@@ -175,24 +178,29 @@ class Optimizer(object):
         assert self.lr_gen is not None, 'Learning rate generator is not set.'\
             'Either set the lr_gen in constructor or call apply_with_lr'
         lr = self.lr_gen(step)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         return self.apply_with_lr(step, lr, grad, value, name)
 
 
 class SGD(Optimizer):
 
-    def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
+    def __init__(self, lr=None, momentum=None, decay=None):
         """The vallina Stochasitc Gradient Descent algorithm.
 
         See the base Optimizer for all arguments.
         """
         super(SGD, self).__init__(lr, momentum, decay)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = momentum
+        if momentum is not None:
+            conf.momentum = momentum
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
 
     def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
+        self.apply_regularizer_constraint(value, grad, name, step)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
@@ -206,6 +214,8 @@ class Nesterov(Optimizer):
         """
         super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
         conf = model_pb2.OptimizerConf()
+        if momentum is not None:
+            conf.momentum = momentum
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
 
@@ -232,6 +242,8 @@ class AdaGrad(Optimizer):
 
     def apply_with_lr(self, step, lr, grad, value, name):
         grad = self.apply_regularizer_constraint(step, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
@@ -255,6 +267,8 @@ class RMSProp(Optimizer):
 
     def apply_with_lr(self, step, lr, grad, value, name):
         grad = self.apply_regularizer_constraint(step, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
@@ -300,7 +314,9 @@ class L2Regularizer(Regularizer):
         if coefficient is None:
             assert self.coefficient is not None, 'Must set the coefficient'
             coefficient = self.coefficient
-        tensor.axpy(coefficient, value, grad)
+        # print coefficient, value.l1(), grad.l1()
+        if coefficient != 0:
+            tensor.axpy(coefficient, value, grad)
         return grad
 
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index ed651e9..1d04cdf 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -177,28 +177,28 @@ class Tensor(object):
         if isinstance(x, Tensor):
             self.singa_tensor += x.singa_tensor
         else:
-            self.singa_tensor += x
+            self.singa_tensor += float(x)
         return self
 
     def __isub__(self, x):
         if isinstance(x, Tensor):
             self.singa_tensor -= x.singa_tensor
         else:
-            self.singa_tensor -= x
+            self.singa_tensor -= float(x)
         return self
 
     def __imul__(self, x):
         if isinstance(x, Tensor):
             self.singa_tensor *= x.singa_tensor
         else:
-            self.singa_tensor *= x
+            self.singa_tensor *= float(x)
         return self
 
     def __idiv__(self, x):
         if isinstance(x, Tensor):
             self.singa_tensor /= x.singa_tensor
         else:
-            self.singa_tensor /= x
+            self.singa_tensor /= float(x)
         return self
 
     '''

[18/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

1. copy the 'examples' folder into docs/ to generate htmls files using the README.md files
2. add software_stack.md to describe the major data structures of v1.0
3. add device.rst to introduce the Device APIs


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bc822cd2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bc822cd2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bc822cd2

Branch: refs/heads/master
Commit: bc822cd208655cf7fd9aea5931e2618f4e2bbe45
Parents: 1db2784
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 11 23:13:12 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 23:44:56 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                      |   1 +
 doc/_static/images/singav1-sw.png | Bin 0 -> 24326 bytes
 doc/conf.py                       |   9 +--
 doc/docs.rst                      |   6 +-
 doc/docs/device.rst               |  47 ++++++++++++++++
 doc/docs/examples.rst             |   6 ++
 doc/docs/index.rst                |  15 ++---
 doc/docs/jp/index.md              |  23 --------
 doc/docs/kr/index.md              |  23 --------
 doc/docs/software_stack.md        |  99 +++++++++++++++++++++++++++++++++
 doc/docs/tensor.md                |   7 +++
 doc/docs/zh/index.md              |  10 ++--
 12 files changed, 177 insertions(+), 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index 62a2236..c6eddf1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -50,6 +50,7 @@ clean:
 
 .PHONY: html
 html:
+	cp -rf ../examples docs/
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/_static/images/singav1-sw.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/singav1-sw.png b/doc/_static/images/singav1-sw.png
new file mode 100644
index 0000000..e443c6e
Binary files /dev/null and b/doc/_static/images/singav1-sw.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 86dc031..20ba51a 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -16,9 +16,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, '../src/python/singa/')
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,7 +36,7 @@ source_parsers = {
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-   
+'sphinx.ext.autodoc'
 ]
 
 # Add any paths that contain templates here, relative to this directory.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
index 2ebea60..400b12a 100644
--- a/doc/docs.rst
+++ b/doc/docs.rst
@@ -2,7 +2,5 @@ Documentation
 =============
 
 .. toctree::
-	docs/index
- 	docs/zh/index
-	docs/jp/index
-	docs/kr/index
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
new file mode 100644
index 0000000..aa5defb
--- /dev/null
+++ b/doc/docs/device.rst
@@ -0,0 +1,47 @@
+Device
+=======
+
+
+The Device abstract represent a hardware device with memory and compuation units.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Create devices
+---------------
+
+Python API
+~~~~~~~~~~
+
+.. autofunction:: device.create_cuda_gpus
+
+.. autofunction:: device.create_cuda_gpus_on
+
+.. autofunction:: device.create_cuda_gpu_on
+
+.. autofunction:: device.get_default_device
+
+
+The following code shows how to create devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+
+CPP API
+~~~~~~~
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
new file mode 100644
index 0000000..b0b2af8
--- /dev/null
+++ b/doc/docs/examples.rst
@@ -0,0 +1,6 @@
+Examples
+========
+
+.. toctree::
+
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index a6a1b49..8a74976 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -2,15 +2,8 @@ English
 =======
 
 .. toctree::
-	overview
-        installation
-	quick-start
-        programming-guide
-        distributed-training
-        data
-        checkpoint
-        python
-        test
-        gpu
-        examples
 
+   installation
+   software_stack
+   device
+   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/jp/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/jp/index.md b/doc/docs/jp/index.md
deleted file mode 100644
index 6679198..0000000
--- a/doc/docs/jp/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \u6700\u65b0\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8
-
----
-
-* [\u30a4\u30f3\u30c8\u30ed\u30c0\u30af\u30b7\u30e7\u30f3](overview.html)
-* [\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb](installation.html)
-* [\u30af\u30a4\u30c3\u30af\u30b9\u30bf\u30fc\u30c8](quick-start.html)
-* [\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0 \u30ac\u30a4\u30c9](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\u5206\u6563 \u30c8\u30ec\u30fc\u30cb\u30f3\u30b0](distributed-training.html)
-* [\u30c7\u30fc\u30bf\u306e\u6e96\u5099](data.html)
-* [Checkpoint \u3068 Resume](checkpoint.html)
-* [\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u30c6\u30b9\u30c8 \u3068 \u7279\u5fb4\u62bd\u51fa](test.html)
-* [\u30b5\u30f3\u30d7\u30eb](examples.html)
-    * Feed-forward \u30e2\u30c7\u30eb
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/kr/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/kr/index.md b/doc/docs/kr/index.md
deleted file mode 100644
index 990d5d9..0000000
--- a/doc/docs/kr/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \ucd5c\uc2e0 \ubb38\uc11c
-
----
-
-* [\uac1c\uc694](overview.html)
-* [\uc778\uc2a4\ud1a8](installation.html)
-* [\ud035 \uc2a4\ud0c0\ud2b8](quick-start.html)
-* [\ud504\ub85c\uadf8\ub798\ubc0d \uac00\uc774\ub4dc](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\ubd84\uc0b0 \ud2b8\ub808\uc774\ub2dd](distributed-training.html)
-* [\ub370\uc774\ud130 \uc900\ube44](data.html)
-* [Checkpoint \uc640 Resume](checkpoint.html)
-* [\uc131\ub2a5\ud14c\uc2a4\ud2b8 \ubc0f \ud2b9\uc9d5\ucd94\ucd9c](test.html)
-* [\uc0d8\ud50c](examples.html)
-    * Feed-forward \ubaa8\ub378
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
new file mode 100644
index 0000000..eaf8362
--- /dev/null
+++ b/doc/docs/tensor.md
@@ -0,0 +1,7 @@
+# Tensor
+
+
+##
+
+
+##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
index c44a2cf..4b49d5f 100644
--- a/doc/docs/zh/index.md
+++ b/doc/docs/zh/index.md
@@ -1,7 +1,9 @@
 SINGA \u4e2d\u6587\u6587\u6863
----
+==============
 
-* [\u7b80\u4ecb](overview.html)
-* [\u5b89\u88c5](installation_source.html)
-* [\u4f7f\u7528\u6307\u5357](programming-guide.html)
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide

[06/51] [abbrv] incubator-singa git commit: SINGA-217 build python package with setup.py

Posted by wa...@apache.org.

SINGA-217 build python package with setup.py

CudnnRNN requires cudnn version later than 5.05, the previous check
based on CUDNN_VERSION_MAJOR and CUDNN_VERSION_PATCH does not work for
version 5.13. Therefore, we replaced the check to CUDNN_VERSION >= 5005.
CUDNN_VERSION is defined in cudnn.h.

cmake defines CUDNN_VERSION_SWIG which is used by model_layer.i for cudnn
rnn.

Remvoe the hardcoded sys imports for python files. Now users need to
install the python module to run python examples and tests.

Tested installation in python virtual enviornment and conda
virtural enviorment:

Inside build/python
```
pip install -e .
```


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6b2ff3c9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6b2ff3c9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6b2ff3c9

Branch: refs/heads/master
Commit: 6b2ff3c98c2a82aa7bc3db245393c3a55f0449cd
Parents: bf81f25
Author: Wei Wang <wa...@gmail.com>
Authored: Thu Aug 11 22:20:45 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Thu Aug 11 23:00:48 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                     |   2 +-
 cmake/Templates/singa_config.h.in  |   1 +
 cmake/Thirdparty/FindCUDNN.cmake   |   2 +-
 examples/char-rnn/sample.py        |   2 +-
 examples/char-rnn/train.py         |   2 +-
 examples/cifar10/alexnet.py        |   2 +-
 examples/cifar10/predict.py        |   2 +-
 examples/cifar10/train.py          |   2 +-
 examples/cifar10/vgg.py            |   2 +-
 src/io/csv_decoder.cc              |   4 +-
 src/model/layer/cudnn_dropout.cc   |   6 +-
 src/model/layer/cudnn_dropout.h    |   5 +-
 src/model/layer/cudnn_rnn.cc       |   4 +-
 src/model/layer/cudnn_rnn.h        |   5 +-
 src/python/singa/__init__.py       |  25 +++--
 src/python/swig/config.i.in        |   7 +-
 src/python/swig/model_layer.i      |   4 +-
 test/python/example_test_device.py |  36 -------
 test/python/example_test_tensor.py | 179 --------------------------------
 test/python/test_tensor.py         |   2 +
 test/singa/test_cudnn_dropout.cc   |   4 +-
 test/singa/test_cudnn_rnn.cc       |   4 +-
 22 files changed, 45 insertions(+), 257 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9c4f326..257fb14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@ INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
 
 OPTION(USE_CBLAS "Use CBlas libs" ON)
 OPTION(USE_CUDA "Use Cuda libs" OFF)
-OPTION(USE_CUDNN "Use Cudnn libs" OFF)
+OPTION(USE_CUDNN "Use Cudnn libs" ON)
 OPTION(USE_OPENCV "Use opencv" OFF)
 OPTION(USE_LMDB "Use LMDB libs" OFF)
 OPTION(USE_PYTHON "Generate py wrappers" ON)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/cmake/Templates/singa_config.h.in
----------------------------------------------------------------------
diff --git a/cmake/Templates/singa_config.h.in b/cmake/Templates/singa_config.h.in
index d03d58b..0211f09 100644
--- a/cmake/Templates/singa_config.h.in
+++ b/cmake/Templates/singa_config.h.in
@@ -16,6 +16,7 @@
 #cmakedefine CUDNN_VERSION_MAJOR @CUDNN_VERSION_MAJOR@
 #cmakedefine CUDNN_VERSION_MINOR @CUDNN_VERSION_MINOR@
 #cmakedefine CUDNN_VERSION_PATCH @CUDNN_VERSION_PATCH@
+#cmakedefine CUDNN_VERSION_SWIG @CUDNN_VERSION_SWIG@
 
 #cmakedefine USE_OPENCL
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/cmake/Thirdparty/FindCUDNN.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindCUDNN.cmake b/cmake/Thirdparty/FindCUDNN.cmake
index cefc4fe..fbc103c 100644
--- a/cmake/Thirdparty/FindCUDNN.cmake
+++ b/cmake/Thirdparty/FindCUDNN.cmake
@@ -25,7 +25,7 @@ IF(CUDNN_FOUND)
     IF(NOT CUDNN_VERSION_MAJOR)
         SET(CUDNN_VERSION "???")
     ELSE()
-        SET(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
+      MATH(EXPR CUDNN_VERSION_SWIG "${CUDNN_VERSION_MAJOR} * 1000 + ${CUDNN_VERSION_MINOR} * 100 + ${CUDNN_VERSION_PATCH}")
     ENDIF()
     MESSAGE(STATUS "Found Cudnn_v${CUDNN_VERSION} at ${CUDNN_INCLUDE_DIR} ${CUDNN_LIBRARIES}")
     MARK_AS_ADVANCED(CUDNN_INCLUDE_DIR CUDNN_LIBRARIES)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/examples/char-rnn/sample.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/sample.py b/examples/char-rnn/sample.py
index a8fcb73..8147732 100644
--- a/examples/char-rnn/sample.py
+++ b/examples/char-rnn/sample.py
@@ -21,7 +21,7 @@ import cPickle as pickle
 import numpy as np
 import argparse
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
 from singa import tensor
 from singa import device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 3dfa0d9..fb5e71f 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -25,7 +25,7 @@ import cPickle as pickle
 import numpy as np
 import argparse
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
 from singa import loss
 from singa import device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 9ed5599..ddad1d5 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -23,7 +23,7 @@ validation accuracy would be about 82%.
 import sys
 import os
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
 from singa import initializer
 from singa import metric

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index 07b1145..8a9ea4e 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -19,7 +19,7 @@ import numpy as np
 import sys
 import os
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import device
 from singa import tensor

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index 3285651..20ce5a7 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -25,7 +25,7 @@ import os
 import sys
 import argparse
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import utils
 from singa import optimizer
 from singa import device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 97e690c..327592f 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -24,7 +24,7 @@ import sys
 import os
 import math
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import layer
 from singa import initializer

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/io/csv_decoder.cc
----------------------------------------------------------------------
diff --git a/src/io/csv_decoder.cc b/src/io/csv_decoder.cc
index 0c11028..72b4e10 100644
--- a/src/io/csv_decoder.cc
+++ b/src/io/csv_decoder.cc
@@ -33,7 +33,7 @@ std::vector<Tensor> CSVDecoder::Decode(std::string value) {
     ss >> l;
   std::string str;
   float d[kMaxCSVBufSize];
-  size_t size = 0;
+  int size = 0;
   while (std::getline(ss, str, ',')) {
     float temp;
     if (std::stringstream(str) >> temp) {
@@ -42,7 +42,7 @@ std::vector<Tensor> CSVDecoder::Decode(std::string value) {
     }
   }
 
-  Tensor data(Shape {size}, kFloat32);
+  Tensor data(Shape {static_cast<size_t>(size)}, kFloat32);
   data.CopyDataFromHostPtr(d, size);
   output.push_back(data);
   if (has_label_ == true) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/model/layer/cudnn_dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_dropout.cc b/src/model/layer/cudnn_dropout.cc
index c5b62cf..e05a425 100644
--- a/src/model/layer/cudnn_dropout.cc
+++ b/src/model/layer/cudnn_dropout.cc
@@ -17,10 +17,10 @@
  */
 #include "./cudnn_dropout.h"
 #ifdef USE_CUDNN
+#include <cudnn.h>
 // cudnn dropout is added in cudnn 5
-#if CUDNN_VERSION_MAJOR >= 5
+#if CUDNN_MAJOR >= 5
 
-#include <cudnn.h>
 #include <chrono>
 
 #include "./cudnn_utils.h"
@@ -112,5 +112,5 @@ void CudnnDropout::ToDevice(std::shared_ptr<Device> device) {
   state_.ToDevice(device);
 }
 }  // namespace singa
-#endif  // CUDNN_VERSION_MAJOR>=5
+#endif  // CUDNN_MAJOR>=5
 #endif  // USE_CUDNN

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/model/layer/cudnn_dropout.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_dropout.h b/src/model/layer/cudnn_dropout.h
index 1241911..c6fc1c9 100644
--- a/src/model/layer/cudnn_dropout.h
+++ b/src/model/layer/cudnn_dropout.h
@@ -20,8 +20,9 @@
 #define SRC_MODEL_LAYER_CUDNN_DROPOUT_H_
 #include "singa/singa_config.h"
 #ifdef USE_CUDNN
+#include <cudnn.h>
 // cudnn dropout is added in cudnn 5
-#if CUDNN_VERSION_MAJOR >= 5
+#if CUDNN_MAJOR >= 5
 #include "./dropout.h"
 
 #include <cudnn.h>
@@ -56,6 +57,6 @@ class CudnnDropout : public Dropout {
   Tensor state_;
 };
 }  // namespace
-#endif  // CUDNN_VERSION_MAJOR>=5
+#endif  // CUDNN_MAJOR>=5
 #endif  // USE_CUDNN
 #endif  // SRC_MODEL_LAYER_CUDNN_DROPOUT_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/model/layer/cudnn_rnn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_rnn.cc b/src/model/layer/cudnn_rnn.cc
index 9961df2..0788801 100644
--- a/src/model/layer/cudnn_rnn.cc
+++ b/src/model/layer/cudnn_rnn.cc
@@ -17,8 +17,8 @@
  */
 #include "./cudnn_rnn.h"
 #ifdef USE_CUDNN
-#if CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
 #include <cudnn.h>
+#if CUDNN_VERSION >= 5005
 #include <chrono>
 #include "./cudnn_utils.h"
 #include "singa/utils/logging.h"
@@ -423,5 +423,5 @@ const std::pair<vector<Tensor>, vector<Tensor>> CudnnRNN::Backward(
 }
 
 }  // namespace singa
-#endif  // CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#endif  // CUDNN_VERSION >= 5005
 #endif  // USE_CUDNN

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/model/layer/cudnn_rnn.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_rnn.h b/src/model/layer/cudnn_rnn.h
index 82c68b0..5e642e0 100644
--- a/src/model/layer/cudnn_rnn.h
+++ b/src/model/layer/cudnn_rnn.h
@@ -20,7 +20,8 @@
 #define SRC_MODEL_LAYER_CUDNN_RNN_H_
 #include "singa/singa_config.h"
 #ifdef USE_CUDNN
-#if CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#include <cudnn.h>
+#if CUDNN_VERSION >= 5005
 #include <string>
 #include <utility>
 #include <vector>
@@ -82,6 +83,6 @@ class CudnnRNN : public RNN {
 
 }  // namespace singa
 
-#endif  // CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#endif  // CUDNN_VERSION >= 5005
 #endif  // USE_CUDNN
 #endif  // SRC_MODEL_LAYER_CUDNN_RNN_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index ec26906..f14c8c5 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -19,14 +19,13 @@
 '''
 This script is the main entrance for user to run singa inside a model workspace
 
-To use this script, user sudo install these dependencies: flask pillow and protobuf 
+To use this script, user sudo install these dependencies: flask pillow and protobuf
 '''
 
 import sys, glob, os, random, shutil, time
 from flask import Flask, request, redirect, url_for
-from PIL import Image
 import numpy as np
-import ConfigParser 
+import ConfigParser
 import urllib, traceback
 
 
@@ -43,7 +42,7 @@ welcome to singa
 '''
 
 app = Flask(__name__)
-config = ConfigParser.RawConfigParser()    
+config = ConfigParser.RawConfigParser()
 service = {}
 data_path = "data_"
 parameter_path = "parameter_"
@@ -126,7 +125,7 @@ USAGE
         if parameter_file:
             print "load parameter file: %s" % parameter_file
             model.load(parameter_file)
-        
+
         if use_cpu:
             raise CLIError("Currently cpu is not support!")
         else:
@@ -141,12 +140,12 @@ USAGE
             from serve import Service
             service =Service(model,d)
 
-            app.debug = debug 
+            app.debug = debug
             app.run(host='0.0.0.0', port= port)
         elif mode == "train":
             print "runing singa in train mode"
-            global trainer 
-            from train import Trainer 
+            global trainer
+            from train import Trainer
             trainer= Trainer(model,d)
             if not parameter_file:
                 trainer.initialize()
@@ -178,18 +177,18 @@ def file_prepare(reload_data=False):
     shutil.rmtree("data_.py",ignore_errors=True)
     shutil.rmtree("data_",ignore_errors=True)
 
-    data_py=open("data_.py",'w') 
+    data_py=open("data_.py",'w')
     data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
     if config.has_section("data"):
         file_list = config.items("data")
         #download files
         for f in file_list:
             name,path=download_file(f[0],f[1],data_path)
-            data_py.write("%s=\"%s\"\n" % (name,path)) 
+            data_py.write("%s=\"%s\"\n" % (name,path))
 
     data_py.flush()
     data_py.close()
-    
+
     if config.has_section("parameter"):
         parameter_list = config.items("parameter")
         for p in parameter_list:
@@ -214,7 +213,7 @@ def get_parameter(file_name=None):
     '''
     if not os.path.exists(parameter_path):
         os.makedirs(parameter_path)
-        return 
+        return
 
     if file_name:
 	return os.path.join(parameter_path,file_name)
@@ -225,7 +224,7 @@ def get_parameter(file_name=None):
     parameter_list.sort()
 
     return parameter_list[-1]
-	
+
 @app.route("/")
 def index():
     return "Hello SINGA User!"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/python/swig/config.i.in
----------------------------------------------------------------------
diff --git a/src/python/swig/config.i.in b/src/python/swig/config.i.in
index ed386b2..5743ba3 100644
--- a/src/python/swig/config.i.in
+++ b/src/python/swig/config.i.in
@@ -1,5 +1,4 @@
 // Pass in cmake configurations to swig
-#cmakedefine01 USE_CUDA 
-#cmakedefine01 USE_CUDNN 
-#cmakedefine CUDNN_VERSION_MAJOR ${CUDNN_VERSION_MAJOR}
-#cmakedefine CUDNN_VERSION_PATCH ${CUDNN_VERSION_PATCH} 
+#cmakedefine01 USE_CUDA
+#cmakedefine01 USE_CUDNN
+#cmakedefine CUDNN_VERSION_SWIG ${CUDNN_VERSION_SWIG}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/src/python/swig/model_layer.i
----------------------------------------------------------------------
diff --git a/src/python/swig/model_layer.i b/src/python/swig/model_layer.i
index f82b0f1..ae651d5 100644
--- a/src/python/swig/model_layer.i
+++ b/src/python/swig/model_layer.i
@@ -84,7 +84,7 @@ class RNN : public Layer {
 };
 
 #if USE_CUDA && USE_CUDNN
-#if CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#if CUDNN_VERSION_SWIG >= 5005
 class CudnnRNN : public RNN {
  public:
  // note: Must use std::vector instead of vector.
@@ -96,7 +96,7 @@ class CudnnRNN : public RNN {
     const std::vector<size_t> GetOutputSampleShape() const override;
 };
 
-#endif  // CUDNN_VERSION_MINOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#endif  // CUDNN_VERSION_SWIG >= 5005
 #endif  // USE_CUDA && USE_CUDNN
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/test/python/example_test_device.py
----------------------------------------------------------------------
diff --git a/test/python/example_test_device.py b/test/python/example_test_device.py
deleted file mode 100644
index c545ad1..0000000
--- a/test/python/example_test_device.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import sys, os
-
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '../../src/python'))
-from device import *
-from tensor import *
-
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '../../build/src'))
-from core_pb2 import *
-
-#---------------------------------------------------------
-# example usage
-#---------------------------------------------------------
-
-d1 = CudaGPU(123)
-print d1.singa_device
-print d1.get_host()
-print d1.get_id()
-print
-
-d2 = CppCPU(345)
-print d2.singa_device
-print d2.get_host()
-print d2.get_id()
-print
-
-s = (2, 3)
-t = Tensor(s, d2.get_host())
-print t.singa_tensor
-print t.device
-print
-
-d = Device(0)
-print d.singa_device
-print

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/test/python/example_test_tensor.py
----------------------------------------------------------------------
diff --git a/test/python/example_test_tensor.py b/test/python/example_test_tensor.py
deleted file mode 100644
index c5ce5f1..0000000
--- a/test/python/example_test_tensor.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import sys, os
-import numpy as np
-
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '../../build/python'))
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '../../build/lib'))
-sys.path.append(os.path.join(os.path.dirname(__file__),'../../build/src'))
-from core_pb2 import *
-from singa.tensor import *
-
-
-#---------------------------------------------------------
-# example usage
-#---------------------------------------------------------
-
-print '----------------------------'
-print 'global SizeOf kFloat32:', sizeof(kFloat32)
-print 'global SizeOf kFloat16:', sizeof(kFloat16)
-print 'global SizeOf kInt:', sizeof(kInt)
-print 'global SizeOf kDouble:', sizeof(kDouble)
-print
-
-a = Tensor()
-print 'a = Tensor()'
-print 'only defaultdevice is assigned \n'
-
-shape = (1, 6)
-t = Tensor(shape)
-print 'shape = (1, 6):', t.shape()
-print 'shape(0), shape(1):', t.shape(0), t.shape(1)
-print 'global Product:', product(shape)
-print 't = Tensor(shape)'
-#t.singa_tensor.AsType(kInt)
-print 'data_type():', t.data_type()
-print 'transpose', t.is_transpose()
-print 'nDim:', t.ndim()
-print 'size:', t.size()
-print 'memsize:', t.memsize()
-print 'data():', t.to_numpy()
-print
-
-print '----------------------------'
-print 't.data\n', t.to_numpy()
-q = t.copy()
-w = t.deepcopy()
-print 'q.data\n', q.to_numpy()
-print 'w.data\n', w.to_numpy()
-t += 1.23
-print 'q.data\n', q.to_numpy()
-print 'w.data\n', w.to_numpy()
-
-print '----------------------------'
-shape = (2, 3)
-t.reshape(shape)
-print 'shape = (3, 2)'
-print 'after reshape, t.shape():', t.shape()
-print 't.data(): \n', t.to_numpy()
-shape = (3, 2)
-t0 = reshape(t, shape)
-print 'shape = (2, 3)'
-print 'after t0 = reshape(t, shape) \n'
-print 't.shape():', t.shape()
-print 't0.shape():', t0.shape()
-print
-
-print '----------------------------'
-t += 1.2345
-print 't += 1.234, i.e., t.__iadd__(1.2345): \n', t.to_numpy()
-print
-
-t1 = t
-print 'copy\nt1 = t'
-print 't1.shape():', t1.shape()
-print 't1.data(): \n', t1.to_numpy()
-print
-
-r = t1.transpose()
-print 'r = t1.transpose()\nr.data() \n', r.to_numpy()
-
-
-print '----------------------------'
-t2 = log(t1)
-print 't2 = log(t1): \n', t2.to_numpy()
-print
-
-t1 += t2
-print 't1 += t2, i.e., t1.__iadd__(t2): \n', t1.to_numpy()
-print
-
-t1 *= 2
-print 't1 *= 2, i.e., t1.__imul__(2): \n', t1.to_numpy()
-print
-
-print '----------------------------'
-tc = t2.clone()
-print 'clone\ntc = t2.clone()\ntc.data(): \n', tc.to_numpy()
-print
-
-print 'sum(tc) \n', sum(tc)
-print
-t3 = sum(tc,0)
-print 'sum(tc,0) \n', t3.to_numpy()
-t3 = sum(tc,1)
-print 'sum(tc,1) \n', t3.to_numpy()
-print
-
-t3 = average(tc,0)
-print 'average(tc,0) \n', t3.to_numpy()
-t3 = average(tc,1)
-print 'average(tc,1) \n', t3.to_numpy()
-print
-
-t3 = softmax(tc,0)
-print 'softmax(tc,0)\n', t3.to_numpy()
-t3 = softmax(tc,1)
-print 'softmax(tc,1)\n', t3.to_numpy()
-
-print '----------------------------'
-print 't1 \n', t1.to_numpy()
-print
-
-n = t1 + t2
-print 't1 + t2: \n', n.to_numpy()
-print
-
-n = t1 * t2
-print 't1*t2: \n', n.to_numpy()
-print
-
-n = t1 - 1.2
-print 't1 - 1.2 \n', n.to_numpy()
-print
-
-n = add(t1, t1)
-print 'add(t1, t1) \n', n.to_numpy()
-print
-
-n = add(t1, 3.4)
-print 'add(t1, 3.4) \n', n.to_numpy()
-print
-
-n = div(t1, 2.0)
-print 'div(t1, 2.0) \n', n.to_numpy()
-print
-
-print '----------------------------'
-shape = (2, 2)
-t4 = Tensor(shape)
-t4 += 3.45
-print 't4 += 3.45 \n', t4.to_numpy()
-print
-
-n = t4 < 3.45
-print 't4 < 3.45 \n', n.to_numpy()
-print
-
-n = lt(t4, 3.45)
-print 'lt(t4, 3.45) \n', n.to_numpy()
-print
-
-n = ge(t4, 3.45)
-print 'ge(t4, 3.45) \n', n.to_numpy()
-print
-
-print '----------------------------'
-print t1.to_numpy()
-print tc.to_numpy()
-print
-copy_data_to_from(t1, tc, 2)
-print t1.to_numpy()
-print tc.to_numpy()
-
-#ttt = t1.singa_tensor < 5.2
-#ttt = lessthan(t1, 5.2)
-#print ttt.data()
-
-#devCPU = singa.CppCPU(1)
-#devGPU = singa.CudaGPU(2)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/test/python/test_tensor.py
----------------------------------------------------------------------
diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py
index 4d8b940..2374adc 100644
--- a/test/python/test_tensor.py
+++ b/test/python/test_tensor.py
@@ -34,6 +34,8 @@ class TestTensorMethods(unittest.TestCase):
         self.shape = (2, 3)
         self.t = tensor.Tensor(self.shape)
         self.s = tensor.Tensor(self.shape)
+        self.t.set_value(0)
+        self.s.set_value(0)
 
     def test_tensor_fields(self):
         t = self.t

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/test/singa/test_cudnn_dropout.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_dropout.cc b/test/singa/test_cudnn_dropout.cc
index 4a89235..f1b8437 100644
--- a/test/singa/test_cudnn_dropout.cc
+++ b/test/singa/test_cudnn_dropout.cc
@@ -21,7 +21,7 @@
 #include "../src/model/layer/cudnn_dropout.h"
 #ifdef USE_CUDNN
 // cudnn dropout is added in cudnn 5
-#if CUDNN_VERSION_MAJOR >= 5
+#if CUDNN_MAJOR >= 5
 
 #include "gtest/gtest.h"
 
@@ -122,5 +122,5 @@ TEST(CudnnDropout, Backward) {
   EXPECT_FLOAT_EQ(dx[1], dy[1] * GetBitValue(mptr, 1) * scale);
   EXPECT_FLOAT_EQ(dx[7], dy[7] * GetBitValue(mptr, 7) * scale);
 }
-#endif  // CUDNN_VERSION_MAJOR>=5
+#endif  // CUDNN_MAJOR>=5
 #endif  // USE_CUDNN

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b2ff3c9/test/singa/test_cudnn_rnn.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_rnn.cc b/test/singa/test_cudnn_rnn.cc
index e293cf7..07336a2 100644
--- a/test/singa/test_cudnn_rnn.cc
+++ b/test/singa/test_cudnn_rnn.cc
@@ -21,7 +21,7 @@
 
 #include "../src/model/layer/cudnn_rnn.h"
 #ifdef USE_CUDNN
-#if CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#if CUDNN_VERSION >= 5005
 
 #include "gtest/gtest.h"
 
@@ -177,5 +177,5 @@ TEST_F(TestCudnnRNN, Backward) {
     std::copy(tmp.begin(), tmp.end(), dhyptr.begin());
   }
 }
-#endif  // CUDNN_VERSION_MAJOR >= 5 && CUDNN_VERSION_PATCH >= 5
+#endif  // CUDNN_VERSION >= 5005
 #endif  // USE_CUDNN

[23/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0 - add python installation instruction, remove flask dependency

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0
  - add python installation instruction, remove flask dependency


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/410f238a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/410f238a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/410f238a

Branch: refs/heads/master
Commit: 410f238af4388c174e9a2725baf40153cacb0915
Parents: d3a57cf
Author: aaronwwf <dc...@gmail.com>
Authored: Sun Aug 14 23:51:03 2016 +0800
Committer: aaronwwf <dc...@gmail.com>
Committed: Mon Aug 15 16:21:23 2016 +0800

----------------------------------------------------------------------
 doc/docs/installation.md     |  73 +++++++++++-
 src/python/setup.py.in       |   5 +-
 src/python/singa/__init__.py | 240 --------------------------------------
 src/python/singa/command.py  | 240 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 313 insertions(+), 245 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/doc/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/docs/installation.md b/doc/docs/installation.md
index 8ab617f..6bfdee3 100755
--- a/doc/docs/installation.md
+++ b/doc/docs/installation.md
@@ -3,7 +3,7 @@
 ## Dependencies
 
 ### Required
-* Google Protobuf (>=2.5)
+* Google Protobuf (>=2.5,<3)
 * BLAS (tested with OpenBLAS >=0.2.10)
 * CUDA (tested with 6.5, 7.0 and 7.5)
 * CUDNN (v4 and v5)
@@ -52,7 +52,7 @@ Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
     $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
     $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
 
-You can use `ccmake ..` to configure the compilation options including using
+You can use `cmake ..` to configure the compilation options including using
 LMDB, GLOG, etc.
 
 After compiling SINGA, you can run the unit tests by
@@ -65,5 +65,74 @@ tests, then you have successfully installed SINGA. Please proceed to try the exa
 
 ### MacOS
 
+Currently only Linux OS is officially support.
 
 ### Windows
+
+Currently only Linux OS is officially support.
+
+
+# Install SINGA Python Module
+
+SINGA provide a python binding for python programers. Users can either install from source or 
+from pre-built wheel file.
+
+## Install from source
+
+### Required
+* python(==2.7)   
+* pip(>=1.5)
+* SWIG(>=3.0)   
+* numpy(>=1.11.0)   
+* Google protobuf(>=2.5,<3)   
+
+
+### Configuration
+To build SINGA python package, users should turn on python building switch in cmake config file: "CMakeList.txt"
+
+    OPTION(USE_PYTHON "Generate py wrappers" ON)
+
+### Instructions
+Follow the instructions in the above sections to build SINGA from source,
+
+After that, execute the following commands:
+
+    # under the build directory
+    $ cd python
+    $ sudo pip install . 
+
+Then singa package should be installed in the corresponding python library. 
+
+## Pip Install from wheel 
+
+Install pip if it is not already installed:
+
+    $ sudo apt-get install python-pip python-dev
+
+Then, select the correct binary to install:
+
+    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
+
+    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
+
+    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
+   
+    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
+   
+Install SINGA:
+
+    $ sudo pip install --upgrade $SINGA_WHEEL_URL
+
+### build wheel file from source
+
+Users can build wheel file from source. After build SINGA, execute the following commands:
+
+    # under the build directory
+    $ cd python
+    $ python setup.py bdist_wheel
+
+Then users may get built wheel file under "dist" directory

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/src/python/setup.py.in
----------------------------------------------------------------------
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index d1ac3c9..f2cd9f3 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -42,8 +42,7 @@ setup(
 
     install_requires=[
         'numpy>=1.11.0',
-        'protobuf>=2.5.0,<3',
-        'flask>=0.10.1'
+        'protobuf>=2.5.0,<3'
         ],
 
     #List additional groups of dependencies here (e.g. development
@@ -75,7 +74,7 @@ setup(
 
     entry_points={
         'console_scripts': [
-            'singa=singa:main',
+            'singa=singa.command:main',
         ],
     },
 )

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index f14c8c5..e69de29 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-
-'''
-This script is the main entrance for user to run singa inside a model workspace
-
-To use this script, user sudo install these dependencies: flask pillow and protobuf
-'''
-
-import sys, glob, os, random, shutil, time
-from flask import Flask, request, redirect, url_for
-import numpy as np
-import ConfigParser
-import urllib, traceback
-
-
-from argparse import ArgumentParser
-from argparse import RawDescriptionHelpFormatter
-sys.path.append(os.getcwd())
-
-__all__ = []
-__version__ = 0.1
-__date__ = '2016-07-20'
-__updated__ = '2016-07-20'
-__shortdesc__ = '''
-welcome to singa
-'''
-
-app = Flask(__name__)
-config = ConfigParser.RawConfigParser()
-service = {}
-data_path = "data_"
-parameter_path = "parameter_"
-
-debug = False
-
-class CLIError(Exception):
-    '''Generic exception to raise and log different fatal errors.'''
-    def __init__(self, msg):
-        super(CLIError).__init__(type(self))
-        self.msg = "E: %s" % msg
-    def __str__(self):
-        return self.msg
-    def __unicode__(self):
-        return self.msg
-
-def main(argv=None): # IGNORE:C0111
-    '''Command line options.'''
-
-    from . import device
-
-    if argv is None:
-        argv = sys.argv
-    else:
-        sys.argv.extend(argv)
-
-    program_name = os.path.basename(sys.argv[0])
-    program_version = "v%s" % __version__
-    program_build_date = str(__updated__)
-    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
-    program_shortdesc = __shortdesc__
-    program_license = '''%s
-
-  Created by dbsystem group on %s.
-  Copyright 2016 NUS School of Computing. All rights reserved.
-
-  Licensed under the Apache License 2.0
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Distributed on an "AS IS" basis without warranties
-  or conditions of any kind, either express or implied.
-
-USAGE
-''' % (program_shortdesc, str(__date__))
-
-    global debug
-
-    try:
-        # Setup argument parser
-        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
-        parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
-        parser.add_argument("-param", "--parameter", dest="parameter",  help="the parameter file path to be loaded")
-        parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
-        parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
-        parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
-        parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
-        parser.add_argument('-V', '--version', action='version', version=program_version_message)
-
-        # Process arguments
-        args = parser.parse_args()
-
-        port = args.port
-        parameter_file = args.parameter
-        mode = args.mode
-        need_reload = args.reload_data
-        use_cpu = args.use_cpu
-        debug = args.debug
-
-        #prepare data files
-        config.read('file.cfg')
-        file_prepare(need_reload)
-
-
-        import network as net
-        model = net.create()
-
-        #load parameter
-        parameter_file=get_parameter(parameter_file)
-
-        if parameter_file:
-            print "load parameter file: %s" % parameter_file
-            model.load(parameter_file)
-
-        if use_cpu:
-            raise CLIError("Currently cpu is not support!")
-        else:
-            print "runing with gpu"
-            d = device.create_cuda_gpu()
-
-        model.to_device(d)
-
-        if mode == "serve":
-            print "runing singa in serve mode, listen to  port: %s " % port
-            global service
-            from serve import Service
-            service =Service(model,d)
-
-            app.debug = debug
-            app.run(host='0.0.0.0', port= port)
-        elif mode == "train":
-            print "runing singa in train mode"
-            global trainer
-            from train import Trainer
-            trainer= Trainer(model,d)
-            if not parameter_file:
-                trainer.initialize()
-            trainer.train()
-        else:
-            raise CLIError("Currently only serve mode is surpported!")
-        return 0
-    except KeyboardInterrupt:
-        ### handle keyboard interrupt ###
-        return 0
-    except Exception, e:
-        if debug:
-            traceback.print_exc()
-            raise(e)
-        indent = len(program_name) * " "
-        sys.stderr.write(program_name + ": " + str(e) + "\n")
-        sys.stderr.write(indent + "  for help use --help \n\n")
-        return 2
-
-def file_prepare(reload_data=False):
-    '''
-        download all files and generate data.py
-    '''
-    if not reload_data and os.path.exists("data_.py"):
-        return
-
-    print "download file"
-    #clean data
-    shutil.rmtree("data_.py",ignore_errors=True)
-    shutil.rmtree("data_",ignore_errors=True)
-
-    data_py=open("data_.py",'w')
-    data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
-    if config.has_section("data"):
-        file_list = config.items("data")
-        #download files
-        for f in file_list:
-            name,path=download_file(f[0],f[1],data_path)
-            data_py.write("%s=\"%s\"\n" % (name,path))
-
-    data_py.flush()
-    data_py.close()
-
-    if config.has_section("parameter"):
-        parameter_list = config.items("parameter")
-        for p in parameter_list:
-            download_file(p[0],p[1],parameter_path)
-
-def download_file(name,path,dest):
-    '''
-    download one file to dest
-    '''
-    if not os.path.exists(dest):
-        os.makedirs(dest)
-    if (path.startswith('http')):
-        file_name = path.split('/')[-1]
-        target = os.path.join(dest,file_name)
-        urllib.urlretrieve(path,target)
-    return name,target
-
-
-def get_parameter(file_name=None):
-    '''
-    get the paticular file name or get the last parameter file
-    '''
-    if not os.path.exists(parameter_path):
-        os.makedirs(parameter_path)
-        return
-
-    if file_name:
-	return os.path.join(parameter_path,file_name)
-
-    parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
-    if len(parameter_list)==0:
-        return
-    parameter_list.sort()
-
-    return parameter_list[-1]
-
-@app.route("/")
-def index():
-    return "Hello SINGA User!"
-
-@app.route('/predict', methods=['POST'])
-def predict():
-    if request.method == 'POST':
-        try:
-            response=service.serve(request)
-        except Exception as e:
-            return e
-        return response
-    return "error, should be post request"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/src/python/singa/command.py
----------------------------------------------------------------------
diff --git a/src/python/singa/command.py b/src/python/singa/command.py
new file mode 100644
index 0000000..f14c8c5
--- /dev/null
+++ b/src/python/singa/command.py
@@ -0,0 +1,240 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+
+'''
+This script is the main entrance for user to run singa inside a model workspace
+
+To use this script, user sudo install these dependencies: flask pillow and protobuf
+'''
+
+import sys, glob, os, random, shutil, time
+from flask import Flask, request, redirect, url_for
+import numpy as np
+import ConfigParser
+import urllib, traceback
+
+
+from argparse import ArgumentParser
+from argparse import RawDescriptionHelpFormatter
+sys.path.append(os.getcwd())
+
+__all__ = []
+__version__ = 0.1
+__date__ = '2016-07-20'
+__updated__ = '2016-07-20'
+__shortdesc__ = '''
+welcome to singa
+'''
+
+app = Flask(__name__)
+config = ConfigParser.RawConfigParser()
+service = {}
+data_path = "data_"
+parameter_path = "parameter_"
+
+debug = False
+
+class CLIError(Exception):
+    '''Generic exception to raise and log different fatal errors.'''
+    def __init__(self, msg):
+        super(CLIError).__init__(type(self))
+        self.msg = "E: %s" % msg
+    def __str__(self):
+        return self.msg
+    def __unicode__(self):
+        return self.msg
+
+def main(argv=None): # IGNORE:C0111
+    '''Command line options.'''
+
+    from . import device
+
+    if argv is None:
+        argv = sys.argv
+    else:
+        sys.argv.extend(argv)
+
+    program_name = os.path.basename(sys.argv[0])
+    program_version = "v%s" % __version__
+    program_build_date = str(__updated__)
+    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
+    program_shortdesc = __shortdesc__
+    program_license = '''%s
+
+  Created by dbsystem group on %s.
+  Copyright 2016 NUS School of Computing. All rights reserved.
+
+  Licensed under the Apache License 2.0
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Distributed on an "AS IS" basis without warranties
+  or conditions of any kind, either express or implied.
+
+USAGE
+''' % (program_shortdesc, str(__date__))
+
+    global debug
+
+    try:
+        # Setup argument parser
+        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
+        parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
+        parser.add_argument("-param", "--parameter", dest="parameter",  help="the parameter file path to be loaded")
+        parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
+        parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
+        parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
+        parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
+        parser.add_argument('-V', '--version', action='version', version=program_version_message)
+
+        # Process arguments
+        args = parser.parse_args()
+
+        port = args.port
+        parameter_file = args.parameter
+        mode = args.mode
+        need_reload = args.reload_data
+        use_cpu = args.use_cpu
+        debug = args.debug
+
+        #prepare data files
+        config.read('file.cfg')
+        file_prepare(need_reload)
+
+
+        import network as net
+        model = net.create()
+
+        #load parameter
+        parameter_file=get_parameter(parameter_file)
+
+        if parameter_file:
+            print "load parameter file: %s" % parameter_file
+            model.load(parameter_file)
+
+        if use_cpu:
+            raise CLIError("Currently cpu is not support!")
+        else:
+            print "runing with gpu"
+            d = device.create_cuda_gpu()
+
+        model.to_device(d)
+
+        if mode == "serve":
+            print "runing singa in serve mode, listen to  port: %s " % port
+            global service
+            from serve import Service
+            service =Service(model,d)
+
+            app.debug = debug
+            app.run(host='0.0.0.0', port= port)
+        elif mode == "train":
+            print "runing singa in train mode"
+            global trainer
+            from train import Trainer
+            trainer= Trainer(model,d)
+            if not parameter_file:
+                trainer.initialize()
+            trainer.train()
+        else:
+            raise CLIError("Currently only serve mode is surpported!")
+        return 0
+    except KeyboardInterrupt:
+        ### handle keyboard interrupt ###
+        return 0
+    except Exception, e:
+        if debug:
+            traceback.print_exc()
+            raise(e)
+        indent = len(program_name) * " "
+        sys.stderr.write(program_name + ": " + str(e) + "\n")
+        sys.stderr.write(indent + "  for help use --help \n\n")
+        return 2
+
+def file_prepare(reload_data=False):
+    '''
+        download all files and generate data.py
+    '''
+    if not reload_data and os.path.exists("data_.py"):
+        return
+
+    print "download file"
+    #clean data
+    shutil.rmtree("data_.py",ignore_errors=True)
+    shutil.rmtree("data_",ignore_errors=True)
+
+    data_py=open("data_.py",'w')
+    data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
+    if config.has_section("data"):
+        file_list = config.items("data")
+        #download files
+        for f in file_list:
+            name,path=download_file(f[0],f[1],data_path)
+            data_py.write("%s=\"%s\"\n" % (name,path))
+
+    data_py.flush()
+    data_py.close()
+
+    if config.has_section("parameter"):
+        parameter_list = config.items("parameter")
+        for p in parameter_list:
+            download_file(p[0],p[1],parameter_path)
+
+def download_file(name,path,dest):
+    '''
+    download one file to dest
+    '''
+    if not os.path.exists(dest):
+        os.makedirs(dest)
+    if (path.startswith('http')):
+        file_name = path.split('/')[-1]
+        target = os.path.join(dest,file_name)
+        urllib.urlretrieve(path,target)
+    return name,target
+
+
+def get_parameter(file_name=None):
+    '''
+    get the paticular file name or get the last parameter file
+    '''
+    if not os.path.exists(parameter_path):
+        os.makedirs(parameter_path)
+        return
+
+    if file_name:
+	return os.path.join(parameter_path,file_name)
+
+    parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
+    if len(parameter_list)==0:
+        return
+    parameter_list.sort()
+
+    return parameter_list[-1]
+
+@app.route("/")
+def index():
+    return "Hello SINGA User!"
+
+@app.route('/predict', methods=['POST'])
+def predict():
+    if request.method == 'POST':
+        try:
+            response=service.serve(request)
+        except Exception as e:
+            return e
+        return response
+    return "error, should be post request"

[50/51] [abbrv] incubator-singa git commit: Preparing for V1.0 RC0.

Posted by wa...@apache.org.

Preparing for V1.0 RC0.

Updated licenses and release notes.
Added the link of v0.3 in layout.html


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ed9587c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ed9587c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ed9587c0

Branch: refs/heads/master
Commit: ed9587c0a4f354cedb89326c956c01cadf224af1
Parents: 9ee16e0
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 18 01:18:36 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 18 01:54:18 2016 +0800

----------------------------------------------------------------------
 LICENSE                                       |   42 +-
 NOTICE                                        |    2 +-
 README.md                                     |    7 +-
 RELEASE_NOTES                                 |   88 ++
 cmake/Thirdparty/FindOpenCL.cmake             |  170 ---
 conf/hostfile                                 |    1 -
 conf/profile                                  |    3 -
 conf/singa.conf                               |    7 -
 doc/_templates/layout.html                    |   11 +-
 doc/build.sh                                  |   17 +
 doc/conf.py                                   |   29 +-
 doc/en/_templates/layout.html                 |   11 +-
 doc/en/community/mail-lists.rst               |   18 +
 doc/en/community/team-list.rst                |   20 +-
 doc/en/develop/schedule.rst                   |   18 +
 doc/en/docs.rst                               |   18 +
 doc/en/docs/device.rst                        |   18 +
 doc/en/docs/index.rst                         |   18 +
 doc/en/docs/initializer.rst                   |   18 +
 doc/en/docs/layer.rst                         |   18 +
 doc/en/docs/loss.rst                          |   18 +
 doc/en/docs/metric.rst                        |   18 +
 doc/en/docs/optimizer.rst                     |   18 +
 doc/en/docs/tensor.rst                        |   18 +
 doc/en/docs/utils.rst                         |   18 +
 doc/en/index.rst                              |   18 +
 doc/zh/index.rst                              |   18 +
 examples/cifar10/README.md                    |    4 +-
 examples/cifar10/resnet.py                    |    5 +-
 examples/cifar10/train.py                     |    5 +-
 rat-excludes                                  |    1 -
 rat_check                                     | 1108 --------------------
 src/CMakeLists.txt                            |   14 +-
 tool/python/README.md                         |  375 -------
 tool/python/examples/__init__.py              |   22 -
 tool/python/examples/cifar10_cnn.py           |   55 -
 tool/python/examples/cifar10_cnn_cudnn.py     |   57 -
 tool/python/examples/cifar10_cnn_parameter.py |   57 -
 tool/python/examples/datasets/__init__.py     |   22 -
 tool/python/examples/datasets/cifar10.py      |   57 -
 tool/python/examples/datasets/mnist.py        |   55 -
 tool/python/examples/mnist_ae.py              |   48 -
 tool/python/examples/mnist_mlp.py             |   55 -
 tool/python/examples/mnist_mlp_parameter.py   |   50 -
 tool/python/examples/mnist_mlp_test.py        |   52 -
 tool/python/examples/mnist_rbm1.py            |   46 -
 tool/python/examples/mnist_rbm2.py            |   47 -
 tool/python/examples/mnist_rbm3.py            |   47 -
 tool/python/examples/mnist_rbm4.py            |   47 -
 tool/python/examples/train_cifar10.py         |  142 ---
 tool/python/examples/train_mnist.py           |  117 ---
 tool/python/singa.py                          |   46 -
 tool/python/singa/__init__.py                 |   22 -
 tool/python/singa/driver.i                    |  117 ---
 tool/python/singa/generatepy.sh               |   26 -
 tool/python/singa/initializations.py          |   67 --
 tool/python/singa/layer.py                    |  693 ------------
 tool/python/singa/model.py                    |  716 -------------
 tool/python/singa/parameter.py                |  140 ---
 tool/python/singa/utils/__init__.py           |   22 -
 tool/python/singa/utils/message.py            |   80 --
 tool/python/singa/utils/utility.py            |   86 --
 62 files changed, 458 insertions(+), 4705 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index 75001c1..4f9f923 100644
--- a/LICENSE
+++ b/LICENSE
@@ -205,25 +205,35 @@ notices and license terms. Your use of the source code for the these
 subcomponents is subject to the terms and conditions of the following
 licenses.
 
+
 ============================================================================
-SINGA bundles the following under BSD 2-clause license: include/singa/utils/blob.h,
-src/utils/blob.cc, include/singa/utils/common.h, src/utils/common.cc, include/singa/utils/cuda_utils.h
+SINGA bundles the following under BSD 3-clause license:
+cmake/Profobuf.cmake
 
-Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
-Copyright (c) 2014, the respective contributors
-https://github.com/BVLC/caffe/blob/master/LICENSE
+Copyright 2009 Kitware, Inc.
+Copyright 2009-2011 Philip Lowman <ph...@yhbt.com>
+Copyright 2008 Esben Mose Hansen, Ange Optimization ApS
 
 =====================================================================
-SINGA bundles the following under BSD 2-clause license: include/singa/utils/tinydir.h
+SINGA bundles the following under BSD 2-clause license:
+include/singa/utils/tinydir.h
 
 Copyright (c) 2013, Cong Xu, Baudouin Feildel
 https://github.com/cxong/tinydir/blob/master/COPYING
 
-=====================================================================
-SINGA bundles the following under Apache v2.0 license: include/mshadow/*
 
-Copyright (c) 2014 by Contributors
-https://github.com/dmlc/mshadow/blob/master/LICENSE
+===========================================================================
+SINGA bundles the following under BSD 2-clause license:
+include/singa/utils/cuda_utils.h, src/core/tensor/distribution.cl
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+https://github.com/BVLC/caffe/blob/master/LICENSE
 
 =====================================================================
 SINGA bundles the following under New BSD license: include/gtest/*
@@ -238,9 +248,13 @@ Copyright (c) 2009 Google Inc. All rights reserved.
 https://github.com/google/styleguide/tree/gh-pages/cpplint
 
 =====================================================================
-SINGA bundles the following under New BSD license: examples/rnnlm/create_data.cc
+SINGA bundles the following under New BSD license: lib/cnmem/*
+
+Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+https://github.com/NVIDIA/cnmem
+
+=====================================================================
+SINGA bundles the following under New BSD license: src/python/swig/numpy.i
 
-Copyright (c) 2010-2012 Tomas Mikolov
-Copyright (c) 2013 Cantab Research Ltd
+Copyright (c) 2005-2015, NumPy Developers.
 All rights reserved.
-http://www.fit.vutbr.cz/~imikolov/rnnlm/

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index c74e53a..092ec36 100644
--- a/NOTICE
+++ b/NOTICE
@@ -4,4 +4,4 @@ Copyright 2016 The Apache Software Foundation
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
 
-Portions of this software were developed at the National University of Singapore.
+Codebase originally donated by National University of Singapore.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 07acd68..fc80db4 100644
--- a/README.md
+++ b/README.md
@@ -4,10 +4,11 @@
 
 Distributed deep learning system
 
-This is the dev branch for V1.0, please refer to the following docs for compilation and examples
 
-* [Compilation](doc/docs/installation.md)
-* [CNN example](doc/docs/cnn.md)
+## Quick Start
+
+* [Installation](doc/en/docs/installation.md)
+* [Examples](examples)
 
 ##Mailing Lists
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/RELEASE_NOTES
----------------------------------------------------------------------
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 5786ad2..e36dce8 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,3 +1,91 @@
+Release Notes - SINGA - Version singa-incubating-1.0.0
+
+SINGA is a general distributed deep learning platform for training big deep learning models over large datasets.
+
+This release includes following features:
+
+  * Core abstractions including Tensor and Device
+      * [SINGA-207]  Update Tensor functions for matrices
+      * [SINGA-205]  Enable slice and concatenate operations for Tensor objects
+      * [SINGA-197]  Add CNMem as a submodule in lib/
+      * [SINGA-196]  Rename class Blob to Block
+      * [SINGA-194]  Add a Platform singleton
+      * [SINGA-175]  Add memory management APIs and implement a subclass using CNMeM
+      * [SINGA-173]  * [SINGA-162 OpenCL Implementation
+      * [SINGA-171]  Create CppDevice and CudaDevice
+      * [SINGA-168]  Implement Cpp Math functions APIs
+      * [SINGA-162]  Overview of features for V1.x
+      * [SINGA-165]  Add cross-platform timer API to singa
+      * [SINGA-167]  Add Tensor Math function APIs
+      * [SINGA-166]  light built-in logging for making glog optional
+      * [SINGA-164]  Add the base Tensor class
+
+
+  * IO components for file read/write, network and data pre-processing
+      * [SINGA-233]  New communication interface
+      * [SINGA-215]  Implement Image Transformation for Image Pre-processing
+      * [SINGA-214]  Add LMDBReader and LMDBWriter for LMDB
+      * [SINGA-213]  Implement Encoder and Decoder for CSV
+      * [SINGA-211]  Add TextFileReader and TextFileWriter for CSV files
+      * [SINGA-210]  Enable checkpoint and resume for v1.0
+      * [SINGA-208]  Add DataIter base class and a simple implementation
+      * [SINGA-203]  Add OpenCV detection for cmake compilation
+      * [SINGA-202]  Add reader and writer for binary file
+      * [SINGA-200]  Implement Encoder and Decoder for data pre-processing
+
+
+
+  * Module components including layer classes, training algorithms and Python binding
+      * [SINGA-235]  Unify the engines for cudnn and singa layers
+      * [SINGA-230]  OpenCL Convolution layer and Pooling layer
+      * [SINGA-222]  Fixed bugs in IO
+      * [SINGA-218]  Implementation for RNN CUDNN version
+      * [SINGA-204]  Support the training of feed-forward neural nets
+      * [SINGA-199]  Implement Python classes for SGD optimizers
+      * [SINGA-198]  Change Layer::Setup API to include input Tensor shapes
+      * [SINGA-193]  Add Python layers
+      * [SINGA-192]  Implement optimization algorithms for Singa v1 (nesterove, adagrad, rmsprop)
+      * [SINGA-191]  Add "autotune" for CudnnConvolution Layer
+      * [SINGA-190]  Add prelu layer and flatten layer
+      * [SINGA-189]  Generate python outputs of proto files
+      * [SINGA-188]  Add Dense layer
+      * [SINGA-187]  Add popular parameter initialization methods
+      * [SINGA-186]  Create Python Tensor class
+      * [SINGA-184]  Add Cross Entropy loss computation
+      * [SINGA-183]  Add the base classes for optimizer, constraint and regularizer
+      * [SINGA-180]  Add Activation layer and Softmax layer
+      * [SINGA-178]  Add Convolution layer and Pooling layer
+      * [SINGA-176]  Add loss and metric base classes
+      * [SINGA-174]  Add Batch Normalization layer and Local Response Nomalization layer.
+      * [SINGA-170]  Add Dropout layer and CudnnDropout layer.
+      * [SINGA-169]  Add base Layer class for V1.0
+
+
+  * Examples
+      * [SINGA-232]  Alexnet on Imagenet
+      * [SINGA-231]  Batchnormlized VGG model for cifar-10
+      * [SINGA-228]  Add Cpp Version of Convolution and Pooling layer
+      * [SINGA-227]  Add Split and Merge Layer and add ResNet Implementation
+
+  * Documentation
+      * [SINGA-239]  Transfer documentation files of v0.3.0 to github
+      * [SINGA-238]  RBM on mnist
+      * [SINGA-225]  Documentation for installation and Cifar10 example
+      * [SINGA-223]  Use Sphinx to create the website
+
+  * Tools for compilation and some utility code
+      * [SINGA-229]  Complete install targets
+      * [SINGA-221]  Support for Travis-CI
+      * [SINGA-217]  build python package with setup.py
+      * [SINGA-216]  add jenkins for CI support
+      * [SINGA-212]  Disable the compilation of libcnmem if USE_CUDA is OFF
+      * [SINGA-195]  Channel for sending training statistics
+      * [SINGA-185]  Add CBLAS and GLOG detection for singav1
+      * [SINGA-181]  Add NVCC supporting for .cu files
+      * [SINGA-177]  Add fully cmake supporting for the compilation of singa_v1
+      * [SINGA-172]  Add CMake supporting for Cuda and Cudnn libs
+
+----------------------------------------------------------
 Release Notes - SINGA - Version singa-incubating-0.3.0
 
 SINGA is a general distributed deep learning platform for training big deep learning models over large datasets.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/cmake/Thirdparty/FindOpenCL.cmake
----------------------------------------------------------------------
diff --git a/cmake/Thirdparty/FindOpenCL.cmake b/cmake/Thirdparty/FindOpenCL.cmake
deleted file mode 100644
index 3c7daeb..0000000
--- a/cmake/Thirdparty/FindOpenCL.cmake
+++ /dev/null
@@ -1,170 +0,0 @@
-# This script was taken from https://github.com/elhigu/cmake-findopencl
-# and modified to support finding OpenCL 2.x C++ bindings.
-
-# Find OpenCL
-#
-# To set manually the paths, define these environment variables:
-# OpenCL_INCPATH    - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include)
-# OpenCL_LIBPATH    - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia)
-#
-# Once done this will define
-#  OPENCL_FOUND            - system has OpenCL
-#  OPENCL_INCLUDE_DIRS     - the OpenCL include directory
-#  OPENCL_LIBRARIES        - link these to use OpenCL
-#  OPENCL_HAS_CPP_BINDINGS - system has also cl2.hpp
-
-FIND_PACKAGE(PackageHandleStandardArgs)
-
-SET (OPENCL_VERSION_STRING "0.1.0")
-SET (OPENCL_VERSION_MAJOR 0)
-SET (OPENCL_VERSION_MINOR 1)
-SET (OPENCL_VERSION_PATCH 0)
-
-IF (APPLE)
-
-	# IF OpenCL_LIBPATH is given use it and don't use default path
-	IF (DEFINED ENV{OpenCL_LIBPATH})
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL PATHS ENV OpenCL_LIBPATH NO_DEFAULT_PATH)
-	ELSE ()
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
-	ENDIF ()
-
-	# IF OpenCL_INCPATH is given use it and find for CL/cl.h and OpenCL/cl.h do not try to find default paths
-	IF (DEFINED ENV{OpenCL_INCPATH})
-		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h OpenCL/cl.h PATHS ENV OpenCL_INCPATH NO_DEFAULT_PATH)
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl2.hpp OpenCL/cl2.hpp PATHS ${OPENCL_INCLUDE_DIRS} NO_DEFAULT_PATH)
-	ELSE ()
-		FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl2.hpp DOC "Include for OpenCL CPP bindings on OSX")
-	ENDIF ()
-
-ELSE (APPLE)
-
-	IF (WIN32)
-
-		# Find OpenCL includes and libraries from environment variables provided by vendor
-		SET(OPENCL_INCLUDE_SEARCH_PATHS)
-		SET(OPENCL_LIBRARY_SEARCH_PATHS)
-		SET(OPENCL_LIBRARY_64_SEARCH_PATHS)
-
-		# Nvidia
-		IF (DEFINED ENV{CUDA_INC_PATH})
-			SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{CUDA_INC_PATH})
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib64)
-			SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib)
-		ENDIF()
-
-		# Intel SDK
-		IF (DEFINED ENV{INTELOCSDKROOT})
-			SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/include)
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x64)
-			SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x86)
-		ENDIF()
-
-		# AMD SDK
-		IF (DEFINED ENV{AMDAPPSDKROOT})
-			SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/include)
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86_64)
-			SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86)
-		ENDIF()
-
-		# Override search paths with OpenCL_INCPATH env variable
-		IF (DEFINED ENV{OpenCL_INCPATH})
-			SET(OPENCL_INCLUDE_SEARCH_PATHS $ENV{OpenCL_INCPATH})
-		ENDIF ()
-
-		# Override search paths with OpenCL_LIBPATH env variable
-		IF (DEFINED ENV{OpenCL_INCPATH})
-			SET(OPENCL_LIBRARY_SEARCH_PATHS $ENV{OpenCL_LIBPATH})
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS $ENV{OpenCL_LIBPATH})
-		ENDIF ()
-
-		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${OPENCL_INCLUDE_SEARCH_PATHS})
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl2.hpp PATHS ${OPENCL_INCLUDE_SEARCH_PATHS})
-		
-		FIND_LIBRARY(_OPENCL_32_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH)
-		FIND_LIBRARY(_OPENCL_64_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_64_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH)
-
-		# Check if 64bit or 32bit versions links fine
-  		SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/openclversion.c")
-  		#SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}/test.c")
-		FILE (WRITE "${_OPENCL_VERSION_SOURCE}"
-			"
-			#if __APPLE__
-			#include <OpenCL/cl.h>
-			#else /* !__APPLE__ */
-			#include <CL/cl.h>
-			#endif /* __APPLE__ */
-			int main()
-			{	
-			    cl_int result;
-			    cl_platform_id id;
-			    result = clGetPlatformIDs(1, &id, NULL);
-			    return result != CL_SUCCESS;
-			}
-			")
-
-  		TRY_COMPILE(_OPENCL_64_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}"
-			CMAKE_FLAGS
-			"-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}"
-			CMAKE_FLAGS
-			"-DLINK_LIBRARIES:STRING=${_OPENCL_64_LIBRARIES}"
-  		)
-
-		IF(_OPENCL_64_COMPILE_SUCCESS)
-			message(STATUS "OpenCL 64bit lib found.")
-			SET(OPENCL_LIBRARIES ${_OPENCL_64_LIBRARIES})
-  		ELSE()
-	  		TRY_COMPILE(_OPENCL_32_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}"
-				CMAKE_FLAGS
-				"-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}"
-				CMAKE_FLAGS
-				"-DLINK_LIBRARIES:STRING=${_OPENCL_32_LIBRARIES}"
-	  		)
-			IF(_OPENCL_32_COMPILE_SUCCESS)
-				message(STATUS "OpenCL 32bit lib found.")
-				SET(OPENCL_LIBRARIES ${_OPENCL_32_LIBRARIES})
-			ELSE()
-				message(STATUS "Couldn't link opencl..")
-			ENDIF()
-		ENDIF()
-
-
-	ELSE (WIN32)
-  
-  		IF (CYGWIN)
-    		SET (CMAKE_FIND_LIBRARY_SUFFIXES .lib)
-    		SET (OCL_LIB_SUFFIX .lib)
-  		ENDIF (CYGWIN)
-
-		# Unix style platforms
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL${OCL_LIB_SUFFIX}
-			PATHS ENV LD_LIBRARY_PATH ENV OpenCL_LIBPATH
-		)
-
-		GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH)
-		GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
-
-		# The AMD SDK currently does not place its headers
-		# in /usr/include, therefore also search relative
-		# to the library
-		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH)
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl2.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH)
-
-	ENDIF (WIN32)
-
-ENDIF (APPLE)
-
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS)
-
-IF(_OPENCL_CPP_INCLUDE_DIRS)
-	SET( OPENCL_HAS_CPP_BINDINGS TRUE )
-	LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} )
-	# This is often the same, so clean up
-	LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS )
-ENDIF(_OPENCL_CPP_INCLUDE_DIRS)
-
-MARK_AS_ADVANCED(
-  OPENCL_INCLUDE_DIRS
-)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/conf/hostfile
----------------------------------------------------------------------
diff --git a/conf/hostfile b/conf/hostfile
deleted file mode 100644
index 2fbb50c..0000000
--- a/conf/hostfile
+++ /dev/null
@@ -1 +0,0 @@
-localhost

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/conf/profile
----------------------------------------------------------------------
diff --git a/conf/profile b/conf/profile
deleted file mode 100644
index 72a8600..0000000
--- a/conf/profile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Please add here the environment variables that cannot be recognized after ssh.
-# This file will be `source`ed upon ssh
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/conf/singa.conf
----------------------------------------------------------------------
diff --git a/conf/singa.conf b/conf/singa.conf
deleted file mode 100644
index 20cff98..0000000
--- a/conf/singa.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-# point to your active zookeeper service
-# this is comma separated host:port pairs, each corresponding to a zk server
-# e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zookeeper_host: "localhost:2181"
-
-# set if you want to change log directory
-log_dir: "/tmp/singa-log/"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html
index 1c10c5b..b149652 100755
--- a/doc/_templates/layout.html
+++ b/doc/_templates/layout.html
@@ -20,14 +20,14 @@
 {% block extrahead %}
     <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
 {% endblock %}
-     
+
 {% block footer %}
 
 <div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
 <a href="http://incubator.apache.org/">
-<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
+<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">
 </a>
- 
+
   <span class="rst-current-version" data-toggle="rst-current-version">
     <span class="fa fa-book"> incubator-singa </span>
     v: {{ version }}
@@ -39,6 +39,11 @@
             <dd><a href="{{ pathto('../en/index.html', 1) }}">English</a></dd>
             <dd><a href="{{ pathto('../zh/index.html', 1) }}">\u4e2d\u6587</a></dd>
         </dl>
+        <dl>
+            <dt>Versions</dt>
+            <dd><a href="http://singa.apache.org/v0.3.0/">0.3</a></dd>
+        </dl>
+
     </div>
 </div>
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/build.sh
----------------------------------------------------------------------
diff --git a/doc/build.sh b/doc/build.sh
index db987d6..eb5b90c 100755
--- a/doc/build.sh
+++ b/doc/build.sh
@@ -1,5 +1,22 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
 SPHINXBUILD="sphinx-build"
 BUILDDIR="_build"
 LANG_ARR=(en zh)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 86dc031..08e391e 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -3,6 +3,22 @@
 # incubator-singa documentation build configuration file, created by
 # sphinx-quickstart on Sat Jul  9 20:36:57 2016.
 #
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # This file is execfile()d with the current directory set to its
 # containing dir.
 #
@@ -16,9 +32,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, os.path.abspath('../build/python/'))
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -34,10 +51,8 @@ source_parsers = {
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-   
-]
-
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/en/_templates/layout.html b/doc/en/_templates/layout.html
index 590e578..2f9ca0d 100755
--- a/doc/en/_templates/layout.html
+++ b/doc/en/_templates/layout.html
@@ -16,7 +16,7 @@
  limitations under the License.
 #}
 {% extends "!layout.html" %}
-     
+
 {% block extrahead %}
     <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
 {% endblock %}
@@ -25,7 +25,7 @@
 
 <div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
 <a href="http://incubator.apache.org/">
-<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
+<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">
 </a>
 
   <span class="rst-current-version" data-toggle="rst-current-version">
@@ -36,18 +36,13 @@
   <div class="rst-other-versions">
     <dl>
        <dd><a href="">English</a></dd>
-       <dd><a href="{{pathto('zh/'+ 'index.html' , 1) }}">\u4e2d\u6587</a></dd>	  
+       <dd><a href="{{pathto('zh/'+ 'index.html' , 1) }}">\u4e2d\u6587</a></dd>
 	  <!--dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
 	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
 	  <dd><a href="/it/latest/">Italiano</a></dd>
 	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd-->
     </dl>
     </dl>
-    <dl>
-      <dt>Versions</dt>
-      <dd><a href="/{{ language }}/latest/">latest</a></dd>
-      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
-    </dl>
   </div>
 </div>
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/community/mail-lists.rst
----------------------------------------------------------------------
diff --git a/doc/en/community/mail-lists.rst b/doc/en/community/mail-lists.rst
index 02b39de..a170042 100644
--- a/doc/en/community/mail-lists.rst
+++ b/doc/en/community/mail-lists.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Project Mailing Lists
 =====================
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/community/team-list.rst
----------------------------------------------------------------------
diff --git a/doc/en/community/team-list.rst b/doc/en/community/team-list.rst
index a677aff..abff0a8 100644
--- a/doc/en/community/team-list.rst
+++ b/doc/en/community/team-list.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 The SINGA Team
 ==============
 
@@ -25,7 +43,7 @@ Developers
 +-------------------+--------------------------------+----------------------------------------------+
 | Haibo Chen        | hzchenhaibo@corp.netease.com   |  NetEase                                     |
 +-------------------+--------------------------------+----------------------------------------------+
-| Anh Dinh	    |     dinhtta@apache.org	     |         National University of Singapore     |                       
+| Anh Dinh	    |     dinhtta@apache.org	     |         National University of Singapore     |
 +-------------------+--------------------------------+----------------------------------------------+
 | Jinyang Gao	    |     jinyang@apache.org	     |         National University of Singapore	    |
 +-------------------+--------------------------------+----------------------------------------------+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/develop/schedule.rst
----------------------------------------------------------------------
diff --git a/doc/en/develop/schedule.rst b/doc/en/develop/schedule.rst
index 2cf81f1..73d713c 100644
--- a/doc/en/develop/schedule.rst
+++ b/doc/en/develop/schedule.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Development Schedule
 ====================
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs.rst b/doc/en/docs.rst
index c1b143b..1b94d02 100644
--- a/doc/en/docs.rst
+++ b/doc/en/docs.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Documentation
 =============
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/device.rst b/doc/en/docs/device.rst
index 53faf48..57993f9 100644
--- a/doc/en/docs/device.rst
+++ b/doc/en/docs/device.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Device
 =======
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/index.rst b/doc/en/docs/index.rst
index a2ea540..d6d7516 100644
--- a/doc/en/docs/index.rst
+++ b/doc/en/docs/index.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Documentation
 =============
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/initializer.rst b/doc/en/docs/initializer.rst
index f334497..6790a8e 100644
--- a/doc/en/docs/initializer.rst
+++ b/doc/en/docs/initializer.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Initializer
 ===========
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/layer.rst b/doc/en/docs/layer.rst
index 62ef3c3..1a576f1 100644
--- a/doc/en/docs/layer.rst
+++ b/doc/en/docs/layer.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Layer
 ======
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/loss.rst b/doc/en/docs/loss.rst
index 27872dd..18c587a 100644
--- a/doc/en/docs/loss.rst
+++ b/doc/en/docs/loss.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Loss
 =========
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/metric.rst b/doc/en/docs/metric.rst
index 35fa24e..20a7144 100644
--- a/doc/en/docs/metric.rst
+++ b/doc/en/docs/metric.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Metric
 =========
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/optimizer.rst b/doc/en/docs/optimizer.rst
index 486c01e..e6f1da9 100644
--- a/doc/en/docs/optimizer.rst
+++ b/doc/en/docs/optimizer.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Optimizer
 =========
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/tensor.rst b/doc/en/docs/tensor.rst
index ff6142e..d9e7f18 100644
--- a/doc/en/docs/tensor.rst
+++ b/doc/en/docs/tensor.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Tensor
 ========
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/utils.rst b/doc/en/docs/utils.rst
index 5306719..4736ce1 100644
--- a/doc/en/docs/utils.rst
+++ b/doc/en/docs/utils.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 Misc.
 =========
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/en/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/index.rst b/doc/en/index.rst
index 1bbbe9a..bdf5b1d 100755
--- a/doc/en/index.rst
+++ b/doc/en/index.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 .. Singa documentation master file, created by
    sphinx-quickstart on Sat Jul  9 20:36:57 2016.
    You can adapt this file completely to your liking, but it should at least

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/doc/zh/index.rst
----------------------------------------------------------------------
diff --git a/doc/zh/index.rst b/doc/zh/index.rst
index 4b49d5f..3d59dd3 100644
--- a/doc/zh/index.rst
+++ b/doc/zh/index.rst
@@ -1,3 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+
 SINGA \u4e2d\u6587\u6587\u6863
 ==============
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/examples/cifar10/README.md
----------------------------------------------------------------------
diff --git a/examples/cifar10/README.md b/examples/cifar10/README.md
index 8076347..bd5ed7d 100644
--- a/examples/cifar10/README.md
+++ b/examples/cifar10/README.md
@@ -48,13 +48,13 @@ version of the Cifar-10 dataset in 'cifar-10-batches-py' folder.
 
 2. alexnet.cc. It trains the AlexNet model using the CPP APIs on a CudaGPU,
 
-        run.sh
+        ./run.sh
 
 3. alexnet-parallel.cc. It trains the AlexNet model using the CPP APIs on two CudaGPU devices.
 The two devices run synchronously to compute the gradients of the mode parameters, which are
 averaged on the host CPU device and then be applied to update the parameters.
 
-        run-parallel.sh
+        ./run-parallel.sh
 
 4. vgg-parallel.cc. It train the VGG model using the CPP APIs on two CudaGPU devices similar to alexnet-parallel.cc.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/examples/cifar10/resnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/resnet.py b/examples/cifar10/resnet.py
index 477c5c7..6b573e9 100644
--- a/examples/cifar10/resnet.py
+++ b/examples/cifar10/resnet.py
@@ -49,7 +49,10 @@ def Block(net, name, nb_filters, stride):
         net.add(layer.Merge(name + "-merge"), [br1bn2, split])
 
 
-def create_net():
+def create_net(use_cpu=False):
+    if use_cpu:
+        layer.engine = 'singacpp'
+
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     net.add(layer.Conv2D("conv1", 16, 3, 1, pad=1, input_sample_shape=(3, 32, 32)))
     net.add(layer.BatchNormalization("bn1"))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index b08ae3c..d2d70df 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -181,5 +181,6 @@ if __name__ == '__main__':
               use_cpu=args.use_cpu)
     else:
         train_x, test_x = normalize_for_alexnet(train_x, test_x)
-        net = resnet.create_net()
-        train((train_x, train_y, test_x, test_y), net, 200, resnet_lr, 1e-4)
+        net = resnet.create_net(args.use_cpu)
+        train((train_x, train_y, test_x, test_y), net, 200, resnet_lr, 1e-4,
+              use_cpu=args.use_cpu)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/rat-excludes
----------------------------------------------------------------------
diff --git a/rat-excludes b/rat-excludes
index 63cb327..f7f79d2 100644
--- a/rat-excludes
+++ b/rat-excludes
@@ -3,7 +3,6 @@ Doxyfile
 Makefile.*
 configure
 .gitignore
-conf/*
 doc/*
 config/*
 \.dirstamp

[11/51] [abbrv] incubator-singa git commit: SINGA-238 RBM on mnist

Posted by wa...@apache.org.

SINGA-238 RBM on mnist

Implement RBM python version on mnist data set
1. The model is following: http://www.cs.toronto.edu/~hinton/science.pdf
2. This model is implemented using python tensors
3. Users should first download mnist.pkl.gz


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e1a524d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e1a524d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e1a524d1

Branch: refs/heads/master
Commit: e1a524d1f428fa4289bdaee48e3b82acac6c0260
Parents: a91bf2a
Author: zhaojing <zh...@comp.nus.edu.sg>
Authored: Tue Aug 9 23:36:49 2016 +0800
Committer: zhaojing <zh...@comp.nus.edu.sg>
Committed: Sun Aug 14 13:29:54 2016 +0800

----------------------------------------------------------------------
 examples/mnist/README.md           |   3 +
 examples/mnist/train.py            | 131 ++++++++++++++++++++++++++++++++
 include/singa/core/tensor.h        |  19 +++++
 src/core/tensor/math_kernel.cu     |  48 +++++++++++-
 src/core/tensor/math_kernel.h      |  12 +++
 src/core/tensor/tensor.cc          |   5 +-
 src/core/tensor/tensor_math.h      |  24 ++++++
 src/core/tensor/tensor_math_cpp.h  |  42 ++++++++++
 src/core/tensor/tensor_math_cuda.h |  35 ++++++++-
 src/python/singa/optimizer.py      |   6 +-
 src/python/singa/tensor.py         |  20 ++++-
 src/python/swig/core_tensor.i      |  10 +++
 12 files changed, 343 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/examples/mnist/README.md
----------------------------------------------------------------------
diff --git a/examples/mnist/README.md b/examples/mnist/README.md
new file mode 100644
index 0000000..bfd480f
--- /dev/null
+++ b/examples/mnist/README.md
@@ -0,0 +1,3 @@
+This example is to train an RBM model using mnist data set. This RBM follows paper http://www.cs.toronto.edu/~hinton/science.pdf and the source code for this paper can be found http://www.cs.toronto.edu/~hinton/MatlabForSciencePaper.html
+1. Download dataset mnist.pkl.gz from https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz
+2. $ python train.py

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
new file mode 100644
index 0000000..52b023a
--- /dev/null
+++ b/examples/mnist/train.py
@@ -0,0 +1,131 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import cPickle
+import numpy as np
+import numpy.matlib
+import os
+import sys
+import gzip, numpy
+
+
+sys.path.append(os.path.join(os.path.dirname(__file__),
+                             '../../build/python'))
+sys.path.append(os.path.join(os.path.dirname(__file__),
+                             '../../build/lib'))
+sys.path.append(os.path.join(os.path.dirname(__file__),'../../build/src'))
+from singa import initializer
+from singa import utils
+from singa import optimizer
+from singa import device
+from singa import tensor
+from singa.proto import core_pb2
+
+
+
+def load_train_data(dir_path):
+    f = gzip.open(dir_path, 'rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    traindata = train_set[0].astype(np.float32)
+    validdata = valid_set[0].astype(np.float32)
+    return traindata, validdata
+
+
+
+def train(data_dir, num_epoch=10, batch_size=100):
+    print 'Start intialization............'
+    lr = 0.1   # Learning rate
+    weight_decay  = 0.0002
+    hdim = 1000
+    vdim = 784
+    opt = optimizer.SGD(momentum=0.8, weight_decay=weight_decay)
+    
+    shape = (vdim, hdim)
+    tweight = tensor.Tensor(shape)
+    initializer.gaussian(tweight, 0.0, 0.1)
+    tvbias = tensor.from_numpy(np.zeros(vdim, dtype = np.float32))
+    thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32))
+    opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)
+
+    print 'Loading data ..................'
+    train_x, valid_x = load_train_data(data_dir)
+
+    num_train_batch = train_x.shape[0]/batch_size
+    print "num_train_batch = \n", num_train_batch
+    for epoch in range(num_epoch):
+        trainerrorsum = 0.0
+        validerrorsum = 0.0
+        print 'Epoch %d' % epoch
+        for b in range(num_train_batch):
+            # positive phase
+            if b % 100 == 0:
+                print "batch: \n", b
+
+            tdata = tensor.from_numpy(train_x[ (b * batch_size): ((b + 1) * batch_size), : ])
+            tposhidprob = tensor.mult(tdata, tweight)
+            tposhidprob.add_row(thbias)
+            tposhidprob = tensor.sigmoid(tposhidprob)
+            tposhidrandom = tensor.Tensor(tposhidprob.shape)
+            initializer.uniform(tposhidrandom, 0.0, 1.0)
+            tposhidsample = tensor.gt(tposhidprob, tposhidrandom)
+            
+            # negative phase
+            tnegdata = tensor.mult(tposhidsample, tweight.transpose())
+            tnegdata.add_row(tvbias)
+            tnegdata = tensor.sigmoid(tnegdata)
+
+            tneghidprob = tensor.mult(tnegdata, tweight)
+            tneghidprob.add_row(thbias) 
+            tneghidprob = tensor.sigmoid(tneghidprob)
+            trainerror = tensor.sum(tensor.eltwise_mult((tdata - tnegdata),(tdata - tnegdata)))
+            trainerrorsum = trainerror + trainerrorsum
+           
+            tgweight = tensor.mult(tnegdata.transpose(), tneghidprob) - tensor.mult(tdata.transpose(), tposhidprob)
+            tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
+            tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)
+            
+            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, '')
+            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, '')
+            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, '')
+
+        info = 'train errorsum = %f' \
+            % (trainerrorsum)
+        print info
+
+        tvaliddata = tensor.from_numpy(valid_x[ :, : ])
+        tvalidposhidprob = tensor.mult(tvaliddata, tweight)
+        tvalidposhidprob.add_row(thbias)
+        tvalidposhidprob = tensor.sigmoid(tvalidposhidprob)
+        tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape)
+        initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
+        tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)
+
+        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.transpose())
+        tvalidnegdata.add_row(tvbias)
+        tvalidnegdata = tensor.sigmoid(tvalidnegdata)
+
+        validerrorsum = tensor.sum(tensor.eltwise_mult((tvaliddata - tvalidnegdata),(tvaliddata - tvalidnegdata)))
+        validinfo = 'valid errorsum = %f' \
+            % (validerrorsum)
+        print validinfo
+
+
+if __name__ == '__main__':
+    data_dir = 'mnist.pkl.gz'
+    assert os.path.exists(data_dir), \
+        'Pls download the mnist dataset'
+    train(data_dir)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 3420a0c..2075b5d 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -283,23 +283,42 @@ Tensor operator<(const Tensor &in, const SType x);
 template <typename SType>
 void LT(const Tensor &in, const SType x, Tensor *out);
 
+/// Element-wise operation, out[i]= (in1[i] < in2[i]) ? 1.f : 0.f
+Tensor operator<(const Tensor &in1, const Tensor& in2);
+void LT(const Tensor &in1, const Tensor& in2, Tensor *out);
+
 /// Element-wise operation, out[i]= (in[i] <= x) ? 1.f : 0.f
 template <typename SType>
 Tensor operator<=(const Tensor &in, const SType x);
 template <typename SType>
 void LE(const Tensor &in, const SType x, Tensor *out);
+
+/// Element-wise operation, out[i]= (in1[i] <= in2[i]) ? 1.f : 0.f
+Tensor operator<=(const Tensor &in1, const Tensor& in2);
+void LE(const Tensor &in1, const Tensor& in2, Tensor *out);
+
 /// Element-wise operation, out[i]= (in[i] > x) ? 1.f : 0.f
 template <typename SType>
 Tensor operator>(const Tensor &in, const SType x);
 template <typename SType>
 void GT(const Tensor &in, const SType x, Tensor *out);
 
+/// Element-wise operation, out[i]= (in1[i] > in2[i]) ? 1.f : 0.f
+Tensor operator>(const Tensor &in1, const Tensor& in2);
+void GT(const Tensor &in1, const Tensor& in2, Tensor *out);
+
+
 /// Element-wise operation, out[i]= (in[i] >= x) ? 1.f : 0.f
 template <typename SType>
 Tensor operator>=(const Tensor &in, const SType x);
 template <typename SType>
 void GE(const Tensor &in, const SType x, Tensor *out);
 
+/// Element-wise operation, out[i]= (in1[i] >= in2[i]) ? 1.f : 0.f
+Tensor operator>=(const Tensor &in1, const Tensor& in2);
+void GE(const Tensor &in1, const Tensor& in2, Tensor *out);
+
+
 Tensor operator+(const Tensor &lhs, const Tensor &rhs);
 void Add(const Tensor &lhs, const Tensor &rhs, Tensor *out);
 Tensor operator-(const Tensor &lhs, const Tensor &rhs);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/core/tensor/math_kernel.cu
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu
index 13005af..e0112f3 100644
--- a/src/core/tensor/math_kernel.cu
+++ b/src/core/tensor/math_kernel.cu
@@ -243,6 +243,14 @@ __global__ void KernelGE(const size_t num, const float *in, const float x,
     out[idx] = in[idx] >= x ? 1.0f : 0.0f;
   }
 }
+
+__global__ void KernelBGE(const size_t num, const float *in1, const float *in2,
+                         float *out) {
+  for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
+       idx += blockDim.x * gridDim.x) {
+    out[idx] = in1[idx] >= in2[idx] ? 1.0f : 0.0f;
+  }
+}
 __global__ void KernelGT(const size_t num, const float *in, const float x,
                          float *out) {
   for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
@@ -250,6 +258,13 @@ __global__ void KernelGT(const size_t num, const float *in, const float x,
     out[idx] = in[idx] > x ? 1.0f : 0.0f;
   }
 }
+__global__ void KernelBGT(const size_t num, const float *in1, const float *in2,
+                         float *out) {
+  for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
+       idx += blockDim.x * gridDim.x) {
+    out[idx] = in1[idx] > in2[idx] ? 1.0f : 0.0f;
+  }
+}
 __global__ void KernelLE(const size_t num, const float *in, const float x,
                          float *out) {
   for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
@@ -257,7 +272,13 @@ __global__ void KernelLE(const size_t num, const float *in, const float x,
     out[idx] = in[idx] <= x ? 1.0f : 0.0f;
   }
 }
-
+__global__ void KernelBLE(const size_t num, const float *in1, const float *in2,
+                         float *out) {
+  for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
+       idx += blockDim.x * gridDim.x) {
+    out[idx] = in1[idx] <= in2[idx] ? 1.0f : 0.0f;
+  }
+}
 __global__ void KernelLT(const size_t num, const float *in, const float x,
                          float *out) {
   for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
@@ -265,7 +286,13 @@ __global__ void KernelLT(const size_t num, const float *in, const float x,
     out[idx] = in[idx] < x ? 1.0f : 0.0f;
   }
 }
-
+__global__ void KernelBLT(const size_t num, const float *in1, const float *in2,
+                         float *out) {
+  for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
+       idx += blockDim.x * gridDim.x) {
+    out[idx] = in1[idx] < in2[idx] ? 1.0f : 0.0f;
+  }
+}
 __global__ void KernelRowMax(const size_t nrow, const size_t ncol, const float *inPtr,
     float *outPtr) {
   for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < nrow;
@@ -381,19 +408,34 @@ void gt(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s) {
   KernelGT <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
 }
+void gt(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s) {
+  KernelBGT <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in1, in2, out);
+}
 void ge(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s) {
   KernelGE <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
 }
+void ge(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s) {
+  KernelBGE <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in1, in2, out);
+}
 void lt(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s) {
   KernelLT <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
 }
+void lt(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s) {
+  KernelBLT <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in1, in2, out);
+}
 void le(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s) {
   KernelLE <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
 }
-
+void le(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s) {
+  KernelBLE <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in1, in2, out);
+}
 void pow(const size_t n, const float *in1, const float *in2, float *out,
          cudaStream_t s) {
   KernelPow <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in1, in2, out);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/core/tensor/math_kernel.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h
index 63b0d82..202777e 100644
--- a/src/core/tensor/math_kernel.h
+++ b/src/core/tensor/math_kernel.h
@@ -66,12 +66,24 @@ void threshold(const size_t n, const float x, const float *in, float *out,
 
 void gt(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s);
+void gt(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s);
+
 void ge(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s);
+void ge(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s);
+
+
 void lt(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s);
+void lt(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s);
+
 void le(const size_t num, const float *in, const float x, float *out,
         cudaStream_t s);
+void le(const size_t num, const float *in1, const float *in2, float *out,
+        cudaStream_t s);
 
 // 2 inputs
 void pow(const size_t n, const float *in1, const float *in2, float *out,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index dfb1eb2..b80e233 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -541,7 +541,10 @@ GenBinaryTensorFn(operator-, Sub);
 GenBinaryTensorFn(operator*, EltwiseMult);
 GenBinaryTensorFn(operator/, Div);
 GenBinaryTensorFn(Pow, Pow);
-
+GenBinaryTensorFn(operator<, LT);
+GenBinaryTensorFn(operator<=, LE);
+GenBinaryTensorFn(operator>, GT);
+GenBinaryTensorFn(operator>=, GE);
 #define EltwiseTensorScalarFn(fn, t, x, ret)                            \
   do {                                                                  \
     TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, {  \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index 1914ca6..bf913c0 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -127,6 +127,12 @@ void LE(const size_t num, const Block *in, const DType x, Block *out,
         Context *ctx) {
   LOG(FATAL) << "LE Not Implemented";
 }
+/// out[i]=(in1[i]<=in2[i])?1.f:0.f
+template <typename DType, typename Lang>
+void LE(const size_t num, const Block *in1, const Block *in2, Block *out,
+        Context *ctx) {
+  LOG(FATAL) << "Tensor-Tensor LE Not Implemented";
+}
 /// Natual logarithm, the base is e, Neper number out[i]=log(in[i]).
 template <typename DType, typename Lang>
 void Log(const size_t num, const Block *in, Block *out, Context *ctx) {
@@ -138,18 +144,36 @@ void LT(const size_t num, const Block *in, const DType x, Block *out,
         Context *ctx) {
   LOG(FATAL) << "LT Not Implemented";
 }
+/// out[i]=(in1[i]<in2[i])?1.f:0.f
+template <typename DType, typename Lang>
+void LT(const size_t num, const Block *in1, const Block *in2, Block *out,
+        Context *ctx) {
+  LOG(FATAL) << "Tensor-Tensor LT Not Implemented";
+}
 /// out[i]=(in[i]>=x)?1.f:0.f
 template <typename DType, typename Lang>
 void GE(const size_t num, const Block *in, const DType x, Block *out,
         Context *ctx) {
   LOG(FATAL) << "GE Not Implemented";
 }
+/// out[i]=(in1[i]>=in2[i])?1.f:0.f
+template <typename DType, typename Lang>
+void GE(const size_t num, const Block *in1, const Block *in2, Block *out,
+        Context *ctx) {
+  LOG(FATAL) << "Tensor-Tensor GE Not Implemented";
+}
 /// out[i]=(in[i]>x)?1.f:0.f
 template <typename DType, typename Lang>
 void GT(const size_t num, const Block *in, const DType x, Block *out,
         Context *ctx) {
   LOG(FATAL) << "GT Not Implemented";
 }
+/// out[i]=(in[i]>in2[i])?1.f:0.f
+template <typename DType, typename Lang>
+void GT(const size_t num, const Block *in, const Block *in2, Block *out,
+        Context *ctx) {
+  LOG(FATAL) << "Tensor-Tensor GT Not Implemented";
+}
 /// out[i] = pow(in[i], x)
 template <typename DType, typename Lang>
 void Pow(const size_t num, const Block *in, const DType x, Block *out,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index a2802d5..8c8a40a 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -142,6 +142,16 @@ void GE<float, lang::Cpp>(const size_t num, const Block *in, const float x,
 }
 
 template <>
+void GE<float, lang::Cpp>(const size_t num, const Block *in1, const Block *in2,
+                          Block *out, Context *ctx) {
+  float *outPtr = static_cast<float *>(out->mutable_data());
+  const float *inPtr1 = static_cast<const float *>(in1->data());
+  const float *inPtr2 = static_cast<const float *>(in2->data());
+  for (size_t i = 0; i < num; i++) {
+    outPtr[i] = (inPtr1[i] >= inPtr2[i]) ? 1.f : 0.f;
+  }
+}
+template <>
 void GT<float, lang::Cpp>(const size_t num, const Block *in, const float x,
                           Block *out, Context *ctx) {
   float *outPtr = static_cast<float *>(out->mutable_data());
@@ -151,6 +161,17 @@ void GT<float, lang::Cpp>(const size_t num, const Block *in, const float x,
   }
 }
 template <>
+void GT<float, lang::Cpp>(const size_t num, const Block *in1, const Block *in2,
+                          Block *out, Context *ctx) {
+  float *outPtr = static_cast<float *>(out->mutable_data());
+  const float *inPtr1 = static_cast<const float *>(in1->data());
+  const float *inPtr2 = static_cast<const float *>(in2->data());
+  for (size_t i = 0; i < num; i++) {
+    outPtr[i] = (inPtr1[i] > inPtr2[i]) ? 1.f : 0.f;
+  }
+}
+
+template <>
 void LE<float, lang::Cpp>(const size_t num, const Block *in, const float x,
                           Block *out, Context *ctx) {
   float *outPtr = static_cast<float *>(out->mutable_data());
@@ -160,6 +181,16 @@ void LE<float, lang::Cpp>(const size_t num, const Block *in, const float x,
   }
 }
 template <>
+void LE<float, lang::Cpp>(const size_t num, const Block *in1, const Block *in2,
+                          Block *out, Context *ctx) {
+  float *outPtr = static_cast<float *>(out->mutable_data());
+  const float *inPtr1 = static_cast<const float *>(in1->data());
+  const float *inPtr2 = static_cast<const float *>(in2->data());
+  for (size_t i = 0; i < num; i++) {
+    outPtr[i] = (inPtr1[i] <= inPtr2[i]) ? 1.f : 0.f;
+  }
+}
+template <>
 void Log<float, lang::Cpp>(const size_t num, const Block *in, Block *out,
                            Context *ctx) {
   float *outPtr = static_cast<float *>(out->mutable_data());
@@ -179,6 +210,17 @@ void LT<float, lang::Cpp>(const size_t num, const Block *in, const float x,
   }
 }
 template <>
+void LT<float, lang::Cpp>(const size_t num, const Block *in1, const Block *in2,
+                          Block *out, Context *ctx) {
+  float *outPtr = static_cast<float *>(out->mutable_data());
+  const float *inPtr1 = static_cast<const float *>(in1->data());
+  const float *inPtr2 = static_cast<const float *>(in2->data());
+  for (size_t i = 0; i < num; i++) {
+    outPtr[i] = (inPtr1[i] < inPtr2[i]) ? 1.f : 0.f;
+  }
+}
+
+template <>
 void Pow<float, lang::Cpp>(const size_t num, const Block *in, const float x,
                            Block *out, Context *ctx) {
   float *outPtr = static_cast<float *>(out->mutable_data());

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index 8b6e939..1cd61b3 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -117,6 +117,15 @@ void GE<float, lang::Cuda>(const size_t num, const Block* in, const float x,
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::ge(num, inPtr, x, outPtr, ctx->stream);
 }
+template <>
+void GE<float, lang::Cuda>(const size_t num, const Block* in1, const Block* in2,
+                           Block* out, Context* ctx) {
+  float* outPtr = static_cast<float*>(out->mutable_data());
+  const float* inPtr1 = static_cast<const float*>(in1->data());
+  const float* inPtr2 = static_cast<const float*>(in2->data());
+  cuda::ge(num, inPtr1, inPtr2, outPtr, ctx->stream);
+}
+
 
 template <>
 void GT<float, lang::Cuda>(const size_t num, const Block* in, const float x,
@@ -125,7 +134,14 @@ void GT<float, lang::Cuda>(const size_t num, const Block* in, const float x,
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::gt(num, inPtr, x, outPtr, ctx->stream);
 }
-
+template <>
+void GT<float, lang::Cuda>(const size_t num, const Block* in1, const Block* in2,
+                           Block* out, Context* ctx) {
+  float* outPtr = static_cast<float*>(out->mutable_data());
+  const float* inPtr1 = static_cast<const float*>(in1->data());
+  const float* inPtr2 = static_cast<const float*>(in2->data());
+  cuda::gt(num, inPtr1, inPtr2, outPtr, ctx->stream);
+}
 template <>
 void LE<float, lang::Cuda>(const size_t num, const Block* in, const float x,
                            Block* out, Context* ctx) {
@@ -133,6 +149,14 @@ void LE<float, lang::Cuda>(const size_t num, const Block* in, const float x,
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::le(num, inPtr, x, outPtr, ctx->stream);
 }
+template <>
+void LE<float, lang::Cuda>(const size_t num, const Block* in1, const Block* in2,
+                           Block* out, Context* ctx) {
+  float* outPtr = static_cast<float*>(out->mutable_data());
+  const float* inPtr1 = static_cast<const float*>(in1->data());
+  const float* inPtr2 = static_cast<const float*>(in2->data());
+  cuda::le(num, inPtr1, inPtr2, outPtr, ctx->stream);
+}
 
 /// Natual logarithm, the base is e, Neper number out[i]=ln(in[i]).
 template <>
@@ -149,7 +173,14 @@ void LT<float, lang::Cuda>(const size_t num, const Block* in, const float x,
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::lt(num, inPtr, x, outPtr, ctx->stream);
 }
-
+template <>
+void LT<float, lang::Cuda>(const size_t num, const Block* in1, const Block* in2,
+                           Block* out, Context* ctx) {
+  float* outPtr = static_cast<float*>(out->mutable_data());
+  const float* inPtr1 = static_cast<const float*>(in1->data());
+  const float* inPtr2 = static_cast<const float*>(in2->data());
+  cuda::lt(num, inPtr1, inPtr2, outPtr, ctx->stream);
+}
 /// Element-wise operation, out[i] = in[i]^x
 template <>
 void Pow<float, lang::Cuda>(const size_t num, const Block* in, const float x,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 503527f..7cab746 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -102,10 +102,12 @@ class Optimizer(object):
             name (str): parameter name
             specs (ParamSpec): protobuf obj
         """
+	assert type(specs) == model_pb2.ParamSpec, \
+		'specs should be model_pb2.ParamSpec instance'
         if specs.HasField('regularizer'):
-            self.regularizers[name] = CppRegularizer(specs.constraint)
+            self.regularizers[name] = CppRegularizer(specs.regularizer)
         if specs.HasField('constraint'):
-            self.constraints[name] = CppConstraint(specs.regularizer)
+            self.constraints[name] = CppConstraint(specs.constraint)
         if specs.lr_mult != 1:
             self.learning_rate_multiplier[name] = specs.lr_mult
         if specs.decay_mult != 1:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index 6e84a4f..ed651e9 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -238,16 +238,28 @@ class Tensor(object):
                                     self.singa_tensor, rhs)
 
     def __lt__(self, rhs):
-        return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs)
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.LT_TT, self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs)
 
     def __le__(self, rhs):
-        return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs)
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.LE_TT, self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs)
 
     def __gt__(self, rhs):
-        return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs)
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.GT_TT, self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs)
 
     def __ge__(self, rhs):
-        return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs)
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.GE_TT, self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs)
 
 
 ''' python functions for global functions in Tensor.h

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e1a524d1/src/python/swig/core_tensor.i
----------------------------------------------------------------------
diff --git a/src/python/swig/core_tensor.i b/src/python/swig/core_tensor.i
index c4ee610..60f8b45 100644
--- a/src/python/swig/core_tensor.i
+++ b/src/python/swig/core_tensor.i
@@ -207,6 +207,16 @@ namespace singa{
   %rename(LE_Tf) operator<=(const Tensor &t, const float x);
   %rename(GT_Tf) operator>(const Tensor &t, const float x);
   %rename(GE_Tf) operator>=(const Tensor &t, const float x);
+  %rename(LT_TT) operator<(const Tensor &lhs, const Tensor &rhs);
+  %rename(LE_TT) operator<=(const Tensor &lhs, const Tensor &rhs);
+  %rename(GT_TT) operator>(const Tensor &lhs, const Tensor &rhs);
+  %rename(GE_TT) operator>=(const Tensor &lhs, const Tensor &rhs);
+
+  Tensor operator<(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator<=(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator>(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator>=(const Tensor &lhs, const Tensor &rhs);
+
 
   template <typename DType>
   Tensor operator<(const Tensor &t, const DType x);

[34/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

update the layer identifier. if the implementation is transparent to devices,
then it has an extra identifier 'singa' besides the speicific identifiers,
i.e., 'singacpp', 'singacl' and 'singacuda'


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c2173b30
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c2173b30
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c2173b30

Branch: refs/heads/master
Commit: c2173b3097a6f38ff2a44f48cc250219ad41b8d4
Parents: 5d20d35
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 20:46:24 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:04:32 2016 +0800

----------------------------------------------------------------------
 src/model/layer/convolution.cc |  1 -
 src/model/layer/pooling.cc     |  1 -
 src/python/singa/layer.py      | 16 ++++++++++------
 3 files changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c2173b30/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 0d1751d..52e9d93 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -23,7 +23,6 @@
 namespace singa {
 using std::vector;
 
-RegisterLayerClass(singa_convolution, Convolution);
 RegisterLayerClass(singacpp_convolution, Convolution);
 void Convolution::Setup(const Shape &in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c2173b30/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index 23969da..a18f9de 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -20,7 +20,6 @@
 #include "singa/model/layer.h"
 namespace singa {
 
-RegisterLayerClass(singa_pooling, Pooling);
 RegisterLayerClass(singacpp_pooling, Pooling);
 void Pooling::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c2173b30/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index a9f3826..86ba836 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -56,7 +56,8 @@ For example, CudnnConvolution layer is identified by 'cudnn_convolution';
 Some layers' implementation use only Tensor functions, thererfore they are
 transparent to the underlying devices. For threse layers, they would have
 multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
-singacl_dropout are all for the Dropout layer.
+singacl_dropout are all for the Dropout layer. In addition, it has an extra
+identifier 'singa', i.e. 'singa_dropout' also stands for the Dropout layer.
 
 engine is case insensitive. Each python layer would create the correct specific
 layer using the engine attribute.
@@ -439,7 +440,8 @@ class BatchNormalization(Layer):
         self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
         self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
         self.param_specs.append(_construct_param_specs_from_dict(var_specs))
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
+                               'singacl'])
         self.layer = _create_layer(engine, 'BatchNorm')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -466,7 +468,8 @@ class LRN(Layer):
         # TODO(wangwei) enable mode = 'within_channel'
         assert mode == 'cross_channel', 'only support mode="across_channel"'
         conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
+                               'singacl'])
         self.layer = _create_layer(engine, 'LRN')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -555,7 +558,8 @@ class Dropout(Layer):
         # 'cudnn' works for v>=5.0
         #  if engine.lower() == 'cudnn':
         #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
+                               'singacl'])
         self.layer = _create_layer(engine, 'Dropout')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -590,7 +594,8 @@ class Softmax(Layer):
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacl', 'singacuda'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacl',
+                               'singacuda'])
         self.layer = _create_layer(engine, 'Softmax')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -820,7 +825,6 @@ def _construct_param_specs_from_dict(specs):
         a ParamSpec object
     """
     conf = model_pb2.ParamSpec()
-    print 'convert', specs
     if 'name' in specs:
         conf.name = specs['name']
     if 'lr_mult' in specs:

[21/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

Added readme file for the cifar-10 examples.
Updated the uniform and gaussian methods in initializer.py to include the
fan_in and fan_out arguments.
Reformat some python files.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/cdd718ed
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/cdd718ed
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/cdd718ed

Branch: refs/heads/master
Commit: cdd718ed946acfd829ccfd6e5b43999f990fd634
Parents: 33992c9
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sun Aug 14 21:41:16 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 23:47:24 2016 +0800

----------------------------------------------------------------------
 doc/docs/examples.rst           |   6 --
 doc/docs/index.rst              |   2 +-
 doc/docs/initializer.rst        |   2 +-
 examples/char-rnn/README.md     |   2 +-
 examples/char-rnn/train.py      | 103 +++++++++++++++++++++--------------
 examples/cifar10/alexnet.py     |  48 +++++++++++++---
 examples/cifar10/predict.py     |  10 ++--
 examples/cifar10/vgg.py         |  10 +---
 examples/index.rst              |   4 ++
 src/python/singa/initializer.py |  85 ++++++++++++++---------------
 src/python/singa/optimizer.py   |   4 +-
 11 files changed, 156 insertions(+), 120 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
deleted file mode 100644
index b0b2af8..0000000
--- a/doc/docs/examples.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Examples
-========
-
-.. toctree::
-
-   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 2294054..11f0ebb 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -12,4 +12,4 @@ English
    loss
    metric
    optimizer
-   examples
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
index a190702..f334497 100644
--- a/doc/docs/initializer.rst
+++ b/doc/docs/initializer.rst
@@ -5,7 +5,7 @@ Python API
 ----------
 
 .. automodule:: singa.initializer
-   :members:
+   :members: uniform, gaussian
    :member-order: bysource
 
 CPP API

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/char-rnn/README.md
----------------------------------------------------------------------
diff --git a/examples/char-rnn/README.md b/examples/char-rnn/README.md
index d4cfa30..f6e5edc 100644
--- a/examples/char-rnn/README.md
+++ b/examples/char-rnn/README.md
@@ -1,4 +1,4 @@
-# Train Char-RNN using SINGA
+# Train Char-RNN over plain text
 
 Recurrent neural networks (RNN) are widely used for modelling sequential data,
 e.g., natural language sentences. This example describes how to implement a RNN

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index fb5e71f..1273a57 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -19,8 +19,6 @@ The model is created following https://github.com/karpathy/char-rnn
 The train file could be any text file,
 e.g., http://cs.stanford.edu/people/karpathy/char-rnn/
 '''
-import sys
-import os
 import cPickle as pickle
 import numpy as np
 import argparse
@@ -32,12 +30,12 @@ from singa import device
 from singa import tensor
 from singa import optimizer
 from singa import initializer
-from singa.proto import core_pb2
 from singa.proto import model_pb2
 from singa import utils
 
 
 class Data(object):
+
     def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8):
         '''Data object for loading a plain text file.
 
@@ -49,8 +47,8 @@ class Data(object):
         self.raw_data = open(fpath, 'r').read()  # read text file
         chars = list(set(self.raw_data))
         self.vocab_size = len(chars)
-        self.char_to_idx = {ch:i for i, ch in enumerate(chars)}
-        self.idx_to_char = {i:ch for i, ch in enumerate(chars)}
+        self.char_to_idx = {ch: i for i, ch in enumerate(chars)}
+        self.idx_to_char = {i: ch for i, ch in enumerate(chars)}
         data = [self.char_to_idx[c] for c in self.raw_data]
         # seq_length + 1 for the data + label
         nsamples = len(data) / (1 + seq_length)
@@ -69,10 +67,10 @@ class Data(object):
 
 def numpy2tensors(npx, npy, dev):
     '''batch, seq, dim -- > seq, batch, dim'''
-    tmpx=np.swapaxes(npx, 0, 1)
-    tmpy=np.swapaxes(npy, 0, 1)
-    inputs=[]
-    labels=[]
+    tmpx = np.swapaxes(npx, 0, 1)
+    tmpy = np.swapaxes(npy, 0, 1)
+    inputs = []
+    labels = []
     for t in range(tmpx.shape[0]):
         x = tensor.from_numpy(tmpx[t])
         y = tensor.from_numpy(tmpy[t])
@@ -99,25 +97,36 @@ def get_lr(epoch):
     return 0.001 / float(1 << (epoch / 50))
 
 
-def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
-        num_stacks=1, lr=0.001, dropout = 0.5, model_path='model.bin'):
+def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
+          num_stacks=1, lr=0.001, dropout=0.5, model_path='model.bin'):
     # SGD with L2 gradient normalization
     opt = optimizer.SGD(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
-    rnn = layer.LSTM(name='lstm', hidden_size=hidden_size, num_stacks=num_stacks,
-            dropout=dropout, input_sample_shape=(data.vocab_size,))
+    rnn = layer.LSTM(
+        name='lstm',
+        hidden_size=hidden_size,
+        num_stacks=num_stacks,
+        dropout=dropout,
+        input_sample_shape=(
+            data.vocab_size,
+        ))
     rnn.to_device(cuda)
     print 'created rnn'
     rnn_w = rnn.param_values()[0]
-    initializer.uniform(rnn_w, -0.08, 0.08)  # init all rnn parameters
+    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
     print 'rnn weight l1 = %f' % (rnn_w.l1())
-    dense = layer.Dense('dense', data.vocab_size, input_sample_shape=(hidden_size,))
+    dense = layer.Dense(
+        'dense',
+        data.vocab_size,
+        input_sample_shape=(
+            hidden_size,
+        ))
     dense.to_device(cuda)
     dense_w = dense.param_values()[0]
     dense_b = dense.param_values()[1]
     print 'dense w ', dense_w.shape
     print 'dense b ', dense_b.shape
-    initializer.xavier(dense_w) # init weight matrix using Xavier
+    initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
     print 'dense weight l1 = %f' % (dense_w.l1())
     dense_b.set_value(0.0)
     print 'dense b l1 = %f' % (dense_b.l1())
@@ -125,18 +134,18 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)
     g_dense_b = tensor.Tensor(dense_b.shape, cuda)
 
-    lossfun = loss.SoftmaxCrossEntropy();
+    lossfun = loss.SoftmaxCrossEntropy()
     for epoch in range(max_epoch):
         train_loss = 0
         for b in range(data.num_train_batch):
             batch = data.train_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
 
             outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
-            grads=[]
+            grads = []
             batch_loss = 0
             g_dense_w.set_value(0.0)
             g_dense_b.set_value(0.0)
@@ -149,52 +158,62 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
                 grads.append(grad)
                 g_dense_w += gwb[0]
                 g_dense_b += gwb[1]
-                #print output.l1(), act.l1()
-            utils.update_progress(b * 1.0 / data.num_train_batch,
-                    'training loss = %f' % (batch_loss / seq_length))
+                # print output.l1(), act.l1()
+            utils.update_progress(
+                b * 1.0 / data.num_train_batch, 'training loss = %f' %
+                (batch_loss / seq_length))
             train_loss += batch_loss
 
             grads.append(tensor.Tensor())
             grads.append(tensor.Tensor())
-            g_rnn_w=rnn.backward(model_pb2.kTrain, grads)[1][0]
+            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
             dense_w, dense_b = dense.param_values()
             opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')
-        print '\nEpoch %d, train loss is %f' % (epoch,
-                train_loss / data.num_train_batch / seq_length)
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_w, dense_w, 'dense_w')
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_b, dense_b, 'dense_b')
+        print '\nEpoch %d, train loss is %f' % \
+            (epoch, train_loss / data.num_train_batch / seq_length)
+
         eval_loss = 0
         for b in range(data.num_test_batch):
             batch = data.val_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
             outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
             for output, label in zip(outputs, labels):
                 output = dense.forward(model_pb2.kEval, output)
-                eval_loss += lossfun.forward(model_pb2.kEval, output, label).l1()
-        print 'Epoch %d, evaluation loss is %f' % (epoch,
-                eval_loss / data.num_test_batch / seq_length)
+                eval_loss += lossfun.forward(model_pb2.kEval,
+                                             output, label).l1()
+        print 'Epoch %d, evaluation loss is %f' % \
+            (epoch, eval_loss / data.num_test_batch / seq_length)
 
     # checkpoint the file model
     with open(model_path, 'wb') as fd:
         print 'saving model to %s' % model_path
-        d={}
-        for name, w in zip(['rnn_w', 'dense_w', 'dense_b'], [rnn_w, dense_w, dense_b]):
+        d = {}
+        for name, w in zip(
+                ['rnn_w', 'dense_w', 'dense_b'],
+                [rnn_w, dense_w, dense_b]):
             w.to_host()
-            d[name]=tensor.to_numpy(w)
-        d['idx_to_char']=data.idx_to_char
-        d['char_to_idx']=data.char_to_idx
-        d['hidden_size']=hidden_size
-        d['num_stacks']=num_stacks
-        d['dropout']=dropout
+            d[name] = tensor.to_numpy(w)
+        d['idx_to_char'] = data.idx_to_char
+        d['char_to_idx'] = data.char_to_idx
+        d['hidden_size'] = hidden_size
+        d['num_stacks'] = num_stacks
+        d['dropout'] = dropout
 
         pickle.dump(d, fd)
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Train multi-stack LSTM for '\
-            'modeling  character sequence from plain text files')
+    parser = argparse.ArgumentParser(
+        description='Train multi-stack LSTM for '
+        'modeling  character sequence from plain text files')
     parser.add_argument('data', type=str, help='training file')
     parser.add_argument('-b', type=int, default=32, help='batch_size')
     parser.add_argument('-l', type=int, default=64, help='sequence length')
@@ -204,4 +223,4 @@ if __name__ == '__main__':
     args = parser.parse_args()
     data = Data(args.data, batch_size=args.b, seq_length=args.l)
     train(data, args.m,  hidden_size=args.d, num_stacks=args.s,
-            seq_length=args.l, batch_size=args.b)
+          seq_length=args.l, batch_size=args.b)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index ddad1d5..34da95d 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -20,12 +20,8 @@ Following the same setting for hyper-parameters and data pre-processing, the fin
 validation accuracy would be about 82%.
 '''
 
-import sys
-import os
-
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
-from singa import initializer
 from singa import metric
 from singa import loss
 from singa import net as ffnet
@@ -40,23 +36,57 @@ def create_net(use_cpu=False):
     W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
     W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
     b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
-    net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
+    net.add(
+        layer.Conv2D(
+            'conv1',
+            32,
+            5,
+            1,
+            W_specs=W0_specs.copy(),
+            b_specs=b_specs.copy(),
+            pad=2,
+            input_sample_shape=(
+                3,
+                32,
+                32,
+            )))
     net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
     net.add(layer.Activation('relu1'))
     net.add(layer.LRN(name='lrn1'))
-    net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv2',
+            32,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu2'))
     net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
     net.add(layer.LRN('lrn2'))
-    net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv3',
+            64,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu3'))
     net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
     net.add(layer.Flatten('flat'))
-    net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
+    net.add(
+        layer.Dense(
+            'dense',
+            10,
+            W_specs=W2_specs.copy(),
+         b_specs=b_specs.copy()))
     for (p, specs) in zip(net.param_values(), net.param_specs()):
         filler = specs.filler
         if filler.type == 'gaussian':
-            initializer.gaussian(p, filler.mean, filler.std)
+            p.gaussian(filler.mean, filler.std)
         else:
             p.set_value(0)
         print specs.name, filler.type, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index 8a9ea4e..307a610 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -16,28 +16,26 @@
 # =============================================================================
 import cPickle as pickle
 import numpy as np
-import sys
-import os
 
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import device
 from singa import tensor
 import net as ffnet
 
 
-def predict(net, images, cuda, topk=5):
+def predict(net, images, dev, topk=5):
     '''Predict the label of each image.
 
     Args:
         net, a pretrained neural net
         images, a batch of images [batch_size, 3, 32, 32], which have been
             pre-processed
-        cuda, the cuda device
+        dev, the training device
         topk, return the topk labels for each image.
     '''
     x = tensor.from_numpy(images.astype(np.float32))
-    x.to_device(cuda)
+    x.to_device(dev)
     y = net.predict(x)
     y.to_host()
     y = tensor.to_numpy(y)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 35ef00d..e8e3602 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -20,11 +20,7 @@ The performance could be improved by tuning some hyper-parameters, including
 learning rate, weight decay, max_epoch, parameter initialization, etc.
 """
 
-import sys
-import os
-import math
-
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import layer
 from singa import initializer
@@ -88,9 +84,9 @@ def create_net(use_cpu=False):
             initializer.uniform(p, 0, 1)
         elif len(p.shape) > 1:
             if 'conv' in name:
-                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+                p.gaussian(0, 0, 3 * 3 * p.shape[0])
             else:
-                initializer.gaussian(p, 0, 0.02)
+                p.gaussian(0, 0.02)
         else:
             p.set_value(0)
         print name, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index d6faf5d..4bb5b49 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,5 +1,9 @@
+Examples
+========
+
 .. toctree::
 
+   cifar10/README
    char-rnn/README
    imagenet/README
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 277fd2f..fb99663 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -23,77 +23,68 @@ Example usages::
     from singa import initializer
 
     x = tensor.Tensor((3, 5))
-    initializer.xavier(x)
+    initializer.uniform(x, 3, 5) # use both fan_in and fan_out
+    initializer.uniform(x, 3, 0)  # use only fan_in
 '''
 
 import math
 
 
-'''
-TODO(wangwei) update the uniform and gaussian initializers
-
 def uniform(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
-    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    '''Initialize the values of the input tensor following a uniform
+    distribution with specific bounds.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
+
+    Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
     training deep feedforward neuralnetworks.
 
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+        avg = 1
+    x = math.sqrt(3.0 * avg / (fan_in + fan_out))
     t.uniform(-x, x)
 
 
 def gaussian(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
+    '''Initialize the values of the input tensor following a Gaussian
+    distribution with specific std.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
 
     Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
     Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
-
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+        avg = 1
+    std = math.sqrt(2.0 * avg / (fan_in + fan_out))
     t.gaussian(0, std)
-'''
-
-
-def uniform(t, low=0, high=1):
-    '''Initialize the parameter values following an Uniform distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        low (float): lower bound
-        high (float): higher bound
-    '''
-    t.uniform(low, high)
-
-
-def gaussian(t, mean=0, std=0.01):
-    '''Initialize the parameter values following an Gaussian distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        mean (float): mean of the distribution
-        std (float): standard variance
-    '''
-    t.gaussian(mean, std)
 
 
 def xavier(t):
     '''Initialize the matrix parameter follow a Uniform distribution from
     [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
 
+    Deprecated. Please use uniform()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -106,6 +97,8 @@ def glorot(t):
     '''Initialize the matrix parameter follow a Gaussian distribution with
     mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
 
+    Deprecated. Please use gaussian()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -118,6 +111,8 @@ def msra(t):
     '''Initialize the matrix parameter follow a Guassian distribution with
     mean = 0, std = math.sqrt(2.0 / nb_row).
 
+    Deprecated. Please use gaussian()
+
     Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
     nonlinearities.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index a964f16..338c6b0 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -44,8 +44,8 @@ class Optimizer(object):
 
     1. construct the optimizer
     2. (optional) register each parameter with its specs.
-    3. use the optimizer to update parameter values given parameter
-        gradients and other optional info
+    3. use the optimizer to update parameter values given parameter gradients
+       and other optional info
 
     The subclasses should override the apply_with_lr function to do the real
     parameter udpate.

[12/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index 6e84a4f..2e60554 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -16,23 +16,63 @@
 # under the License.
 # =============================================================================
 """
-This script includes Tensor class and its methods for python users
-to call singa::Tensor and its methods
+Example usage::
+
+    from singa import tensor
+    from singa import device
+
+    # create a tensor with shape (2,3), default CppCPU device and float32
+    x = tensor.Tensor((2,3))
+    x.set_value(0.4)
+
+    # create a tensor from a numpy array
+    y = tensor.from_numpy((3,3), dtype=np.float32)
+    y.uniform(-1, 1)
+
+    z = mult(x, y)  # gemm -> z of shape (2, 3)
+
+    x += z # element-wise addition
+
+    dev = device.create_cuda_gpu()
+    x.to_device(dev)  # move the data to a gpu device
+
+    r = relu(x)
+
+    r.to_host()  # move the data back to host cpu
+    s = r.to_numpy()  # tensor -> numpy array, r must be on cpu
+
+
+There are two set of tensor functions,
+
+Tensor member functions
+    which would change the internal state of the Tensor instance.
+Tensor module functions
+    which accept Tensor instances as arguments and return Tensor instances.
+
+Every Tesor instance must be initialized before reading data from it.
 """
 
 import numpy as np
+from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-from functools import reduce
+import device
 
 
 class Tensor(object):
-    ''' Class and member functions for singa::Tensor
+    '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
+
+    The three arguments are three attributes of the Tensor.
+
+    Args:
+        shape (list<int>): a list of integers for the tensor shape. If shape is
+            not specified, the created tensor is called a dummy tensor.
+        device: a swig converted Device instance using the device moduel . If it
+            is None, then the default host device would be used.
+        dtype: data type. currently, most operations only accept kFloat32.
     '''
 
     def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
-        ''' shape = (tuple)
-        '''
         if shape is None:
             # call constructor of singa::Tensor
             self.singa_tensor = singa.Tensor()
@@ -48,125 +88,230 @@ class Tensor(object):
             self.device = device
             self.dtype = dtype
 
-    def copy_from_numpy(self, np_array, offset=0):
-        ''' this method stores the values of numpy array into tensor data
-            from the position of offset
-        '''
-        assert np_array.size == self.size(), 'tensor shape should be the same'
-        if not np_array.ndim == 1:
-            np_array = np_array.flatten()
-        dt = np_array.dtype
-        if dt == np.float32:
-            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
-        elif dt == np.int or dt == np.int32:
-            self.singa_tensor.intCopyDataFromHostPtr(np_array)
-        else:
-            print 'Not implemented yet for ', dt
-
-    # deprecated, access the member data_type directly
-    def data_type(self):
-        return self.singa_tensor.data_type()
-
-    # deprecated, access the member shape directly
-    def shape(self, axis=None):
-        if axis is None:
-            return self.singa_tensor.shape()
-        else:
-            return self.singa_tensor.shape(axis)
-
     def ndim(self):
+        '''
+        Returns:
+            the number of dimensions of the tensor.
+        '''
         return self.singa_tensor.nDim()
 
-    def is_transpose(self):  # TODO(wangwei) make transpose a member
+    def is_transpose(self):
+        '''
+        Returns:
+            True if the internal data is transposed; otherwise False.
+        '''
         return self.singa_tensor.transpose()
 
     def size(self):  # TODO(wangwei) compute size
+        '''
+        Returns:
+            the number of elements of the tensor.
+        '''
         return self.singa_tensor.Size()
 
     def memsize(self):
+        '''
+        Returns:
+            the number of Bytes allocated for this tensor.
+        '''
         return self.singa_tensor.MemSize()
 
     def reshape(self, shape):
+        '''Change the tensor shape.
+
+        Args:
+            shape (list<int>): new shape, which should have the same volumn as
+                the original shape.
+        '''
         assert product(self.shape) == product(shape), \
-               'product of shape should be equal'
+            'product of shape should be equal'
         self.shape = shape
-        self.singa_tensor.Reshape(_tuple_to_vector(shape))
+        self.singa_tensor.Reshape(list(shape))
 
     def reset_like(self, t):
+        '''Reset the shape, dtype and device as the given tensor.
+
+        Args:
+            t (Tensor)
+        '''
         self.singa_tensor.ResetLike(t.singa_tensor)
 
+    '''
     def as_type(self, dtype):
+        Change the data type.
+
+        Args:
+            dtype:
         self.singa_tensor.AsType(dtype)
+    '''
 
     def to_device(self, device):
+        '''Move the tensor data onto a given device.
+
+        Args:
+            device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
+        '''
         self.singa_tensor.ToDevice(device)
 
     def to_host(self):
+        '''Move the tensor data onto the default host CppCPU device.
+        '''
         self.singa_tensor.ToHost()
 
     def l2(self):
+        '''
+        Returns:
+            the L2 norm.
+        '''
         return self.singa_tensor.L2()
 
     def l1(self):
+        '''
+        Returns:
+            the L1 norm.
+        '''
         return self.singa_tensor.L1()
 
     def set_value(self, x):
+        '''Set all elements of the tensor to be the give value.
+
+        Args:
+            x (float), a float value to be set to all elements.
+        '''
         # assert type(x) == float, 'set value only accepts float input'
         # if isinstance(x, float):
         self.singa_tensor.floatSetValue(x)
 
+    def copy_from_numpy(self, np_array, offset=0):
+        ''' Copy the data from the numpy array.
+
+        Args:
+            np_array: source numpy array
+            offset (int): destination offset
+        '''
+        assert np_array.size == self.size(), 'tensor shape should be the same'
+        if not np_array.ndim == 1:
+            np_array = np_array.flatten()
+        dt = np_array.dtype
+        if dt == np.float32:
+            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
+        elif dt == np.int or dt == np.int32:
+            self.singa_tensor.intCopyDataFromHostPtr(np_array)
+        else:
+            print 'Not implemented yet for ', dt
+
     def copy_data(self, t):
+        '''Copy data from other Tensor instance.
+
+        Args:
+            t (Tensor): source Tensor.
+        '''
+        assert type(t) == Tensor, 't must be a singa Tensor instance'
         self.singa_tensor.CopyData(t.singa_tensor)
 
     def clone(self):
-        ''' it does deep copy
-            call singa::Tensor::Clone()
+        '''
+        Returns:
+            a new Tensor which does deep copy of this tensor
         '''
         return _call_singa_func(self.singa_tensor.Clone)
 
-    def transpose(self):
-        ''' shallow copy, negate the transpose field
-            call singa::Tensor::T()
+    def T(self):
+        ''' shallow copy, negate the transpose field.
+
+        Returns:
+            a new Tensor which shares the underlying data memory (shallow copy)
+            but is marked as a transposed version of this tensor.
         '''
         return _call_singa_func(self.singa_tensor.T)
 
+    '''
     def copy(self):
-        ''' shallow copy
+        shallow copy
             call copy constructor of singa::Tensor
-        '''
         return _call_singa_func(singa.Tensor, self.singa_tensor)
+    '''
 
     def deepcopy(self):
-        ''' deep copy
-            call singa::Tensor::Clone()
+        '''Same as clone().
+
+        Returns:
+            a new Tensor
         '''
         return self.clone()
 
     def bernoulli(self, p):
+        '''Sample 0/1 for each element according to the given probability.
+
+        Args:
+            p (float): with probability p, each element is sample to 1.
+        '''
         singa.floatBernoulli(float(p), self.singa_tensor)
 
     def gaussian(self, mean, std):
+        '''Generate a value for each element following a Gaussian distribution.
+
+        Args:
+            mean (float): mean of the distribution
+            std (float): standard variance of the distribution
+        '''
         singa.floatGaussian(float(mean), float(std), self.singa_tensor)
 
     def uniform(self, low, high):
+        '''Generate a value for each element following a uniform distribution.
+
+        Args:
+            low (float): the lower bound
+            high (float): the hight bound
+        '''
         singa.floatUniform(float(low), float(high), self.singa_tensor)
 
     def add_column(self, v):
+        '''Add a tensor to each column of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a column to this tensor.
+        '''
         singa.AddColumn(v.singa_tensor, self.singa_tensor)
 
     def add_row(self, v):
+        '''Add a tensor to each row of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a row to this tensor.
+        '''
         singa.AddRow(v.singa_tensor, self.singa_tensor)
 
     def div_column(self, v):
+        '''Divide each column of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.DivColumn(v.singa_tensor, self.singa_tensor)
 
     def div_row(self, v):
+        '''Divide each row of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.DivRow(v.singa_tensor, self.singa_tensor)
 
     def mult_column(self, v):
+        '''Multiply each column of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.MultColumn(v.singa_tensor, self.singa_tensor)
 
     def mult_row(self, v):
+        '''Multiply each row of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.MultRow(v.singa_tensor, self.singa_tensor)
 
     '''
@@ -174,6 +319,11 @@ class Tensor(object):
     '''
 
     def __iadd__(self, x):
+        ''' inplace element-wise addition with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor += x.singa_tensor
         else:
@@ -181,6 +331,12 @@ class Tensor(object):
         return self
 
     def __isub__(self, x):
+        ''' inplace element-wise subtraction with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+
         if isinstance(x, Tensor):
             self.singa_tensor -= x.singa_tensor
         else:
@@ -188,6 +344,11 @@ class Tensor(object):
         return self
 
     def __imul__(self, x):
+        ''' inplace element-wise multiplication with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor *= x.singa_tensor
         else:
@@ -195,6 +356,11 @@ class Tensor(object):
         return self
 
     def __idiv__(self, x):
+        ''' inplace element-wise division by a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor /= x.singa_tensor
         else:
@@ -272,29 +438,72 @@ def product(shape):
 
 
 def sizeof(dtype):
+    '''
+    Returns:
+        the number of bytes of the given SINGA data type defined in core.proto
+    '''
     return singa.SizeOf(dtype)
 
 
 def reshape(t, s):
+    '''Reshape the input tensor with the given shape.
+
+    Args:
+        t (Tensor): the tensor to be changed
+        s (list<int>): the new shape, which should have the same volumn as the
+            old shape.
+
+    Returns:
+        the new Tensor
+    '''
     return _call_singa_func(singa.Reshape, t.singa_tensor, s)
 
 
 def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
+    '''Copy the data between two Tensor instances which could be on different
+    devices.
+
+    Args:
+        dst (Tensor): destination Tensor
+        src (Tensor): source Tensor
+        size (int) : number of elements to copy
+        dst_offset (int): offset in terms of elements to the start of dst
+        src_offset (int): offset in terms of elements to the start of src
+    '''
     singa.CopyDataToFrom(dst.singa_tensor, src.singa_tensor, size,
                          dst_offset, src_offset)
 
 
 def from_numpy(np_array):
+    '''Create a Tensor instance with the shape, dtype and values from the numpy
+    array.
+
+    Args:
+        np_array: the numpy array.
+
+    Returns:
+        A Tensor instance allocated on the default CppCPU device.
+    '''
     ret = Tensor(np_array.shape)
     ret.copy_from_numpy(np_array)
     return ret
 
 
 def to_numpy(t):
-    ''' this method gets the values of tensor data and
-        returns it as numpy array
-        TODO(wangwei) clone t to host
+    '''Convert the tensor into a numpy array.
+
+    Since numpy array is allocated on CPU devices, the input Tensor instance
+    must be on the default CppCPU device.
+
+    Args:
+        t (Tensor), a Tensor on the default CppCPU device.
+
+    Returns:
+        a numpy array
     '''
+    assert t.device == device.get_default_device() or t.device is None, \
+        'Please move the tensor onto the default host device'
+
     if t.dtype == core_pb2.kFloat32:
         np_array = t.singa_tensor.floatGetValue(int(t.size()))
     elif t.dtype == core_pb2.kInt:
@@ -305,34 +514,96 @@ def to_numpy(t):
 
 
 def abs(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = abs(x), x is an element of t
+    '''
     return _call_singa_func(singa.Abs, t.singa_tensor)
 
 
 def exp(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = exp(x), x is an element of t
+    '''
     return _call_singa_func(singa.Exp, t.singa_tensor)
 
 
 def log(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = log(x), x is an element of t
+    '''
     return _call_singa_func(singa.Log, t.singa_tensor)
 
 
 def relu(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x if x >0; otherwise 0; x is an element
+        of t
+    '''
     return _call_singa_func(singa.ReLU, t.singa_tensor)
 
 
 def sigmoid(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = sigmoid(x); x is an element of t
+    '''
     return _call_singa_func(singa.Sigmoid, t.singa_tensor)
 
 
 def square(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x * x, x is an element of t
+    '''
     return _call_singa_func(singa.Square, t.singa_tensor)
 
 
 def tanh(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = tanh(x), x is an element of t
+    '''
     return _call_singa_func(singa.Tanh, t.singa_tensor)
 
 
 def sum(t, axis=None):
+    '''Sum elements of the input tensor long the given axis.
+
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, the summation is done over all elements;
+            if axis is provided, then it is calculated along the given axis,
+            e.g. 0 -- sum each column; 1 -- sum each row.
+
+    Returns:
+        a float value as the sum of all elements, or a new Tensor
+    '''
+
     if axis is None:
         return singa.floatSum(t.singa_tensor)
     else:
@@ -340,6 +611,17 @@ def sum(t, axis=None):
 
 
 def pow(t, x, out=None):
+    '''
+    Args:
+        t (Tensor): input tensor
+        x (float or Tensor): y[i] = t[i]^x if x is a float value; otherwise,
+            y[i]= t[i]^x[i] if x is a tensor.
+        out (None or Tensor): if None, a new Tensor would be constructed to
+            store the result; otherwise, the result is put into out.
+
+    Returns:
+        the result tensor.
+    '''
     if out is None:
         if isinstance(x, Tensor):
             return _call_singa_func(singa.Pow, t.singa_tensor, x.singa_tensor)
@@ -353,7 +635,17 @@ def pow(t, x, out=None):
         return out
 
 
-def average(t, axis=0):
+def average(t, axis=None):
+    '''
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, average all elements; otherwise average
+            along the given dimension. 0 for averaging each column; 1 for
+            averaging each row.
+
+    Returns:
+        a float value if axis is None; otherwise, a new Tensor for the result.
+    '''
     if t.ndim() > 1:
         return _call_singa_func(singa.Average, t.singa_tensor, axis)
     else:
@@ -361,6 +653,15 @@ def average(t, axis=0):
 
 
 def softmax(t, out=None):
+    '''Apply SoftMax for each row of the Tensor.
+
+    Args:
+        t (Tensor): the input 1d or 2d tensor
+        out (Tensor, optional): if not None, it is used to store the result
+
+    Returns:
+        the result Tensor
+    '''
     if out is None:
         return _call_singa_func(singa.SoftMax, t.singa_tensor)
     else:
@@ -369,22 +670,73 @@ def softmax(t, out=None):
 
 
 def lt(t, x):
+    '''Elementi-wise comparison for t < x
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] < x ? 1.0f:0.0f,
+        or t[i] < x[i] ? 1.0f:0.0f
+    '''
     return t < x
 
 
 def le(t, x):
+    '''Elementi-wise comparison for t <= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] <= x ? 1.0f:0.0f,
+        or t[i] <= x[i] ? 1.0f:0.0f
+    '''
     return t <= x
 
 
 def gt(t, x):
+    '''Elementi-wise comparison for t > x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] > x ? 1.0f:0.0f,
+        or t[i] > x[i] ? 1.0f:0.0f
+    '''
     return t > x
 
 
 def ge(t, x):
+    '''Elementi-wise comparison for t >= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] >= x ? 1.0f:0.0f,
+        or t[i] >= x[i] ? 1.0f:0.0f
+    '''
     return t >= x
 
 
 def add(lhs, rhs, ret=None):
+    '''Elementi-wise addition.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__add__()
         return lhs + rhs
@@ -397,6 +749,17 @@ def add(lhs, rhs, ret=None):
 
 
 def sub(lhs, rhs, ret=None):
+    '''Elementi-wise subtraction.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__sub__()
         return lhs - rhs
@@ -409,6 +772,18 @@ def sub(lhs, rhs, ret=None):
 
 
 def eltwise_mult(lhs, rhs, ret=None):
+    '''Elementi-wise multiplication.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+
     if ret is None:
         # call Tensor.__mul__()
         return lhs * rhs
@@ -423,8 +798,21 @@ def eltwise_mult(lhs, rhs, ret=None):
 
 
 def mult(A, B, C=None, alpha=1.0, beta=0.0):
-    '''
+    '''Do matrix-matrix or matrix-vector multiplication.
+
     This function returns C = alpha * A * B + beta * C
+
+    Args:
+        A (Tensor): 2d Tensor
+        B (Tensor): If B is a 1d Tensor, GEMV would be invoked for matrix-vector
+            multiplication; otherwise GEMM would be invoked.
+        C (Tensor, optional): for storing the result; If None, a new Tensor
+            would be created.
+        alpha (float)
+        beta (float)
+
+    Returns:
+        the result Tensor
     '''
     if C is None:
         return _call_singa_func(singa.Mult, A.singa_tensor, B.singa_tensor)
@@ -435,6 +823,17 @@ def mult(A, B, C=None, alpha=1.0, beta=0.0):
 
 
 def div(lhs, rhs, ret=None):
+    '''Elementi-wise division.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__div__()
         return lhs / rhs
@@ -447,51 +846,125 @@ def div(lhs, rhs, ret=None):
 
 
 def axpy(alpha, x, y):
-    if isinstance(alpha, float):
-        singa.floatAxpy(alpha, x.singa_tensor, y.singa_tensor)
+    '''Element-wise operation for y += alpha * x.
+
+    Args:
+        alpha (float)
+        x (Tensor)
+        y (Tensor)
+
+    Returns:
+        y
+    '''
+    singa.floatAxpy(float(alpha), x.singa_tensor, y.singa_tensor)
     return y
 
 
 def bernoulli(p, t):
-    if isinstance(p, float):
-        singa.floatBernoulli(p, t.singa_tensor)
+    '''Generate a binary value for each element of t.
+
+    Args:
+        p (float): each element is 1 with probability p; and 0 with 1 - p
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatBernoulli(float(p), t.singa_tensor)
     return t
 
 
 def gaussian(mean, std, t):
-    if isinstance(mean, float):
-        singa.floatGaussian(mean, std, t.singa_tensor)
+    '''Generate values following a Gaussian distribution.
+
+    Args:
+        mean (float): the mean of the Gaussian distribution.
+        std (float): the standard variance of the Gaussian distribution.
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatGaussian(float(mean), float(std), t.singa_tensor)
     return t
 
 
 def uniform(low, high, t):
-    if isinstance(low, float):
-        singa.floatUniform(low, high, t.singa_tensor)
+    '''Generate values following a Uniform distribution.
+
+    Args:
+        low (float): the lower bound
+        hight (float): the higher bound
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatUniform(float(low), float(high), t.singa_tensor)
     return t
 
 
 def add_column(alpha, v, beta, M):
-    singa.floatAddColumn(alpha, beta, v.singa_tensor, M.singa_tensor)
+    '''Add v to each column of M.
+
+    Denote each column of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
+    singa.floatAddColumn(float(alpha), float(beta), v.singa_tensor,
+                         M.singa_tensor)
     return M
 
 
 def add_row(alpha, v, beta, M):
+    '''Add v to each row of M.
+
+    Denote each row of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
     singa.floatAddRow(alpha, beta, v.singa_tensor, M.singa_tensor)
     return M
 
 
 def sum_columns(M):
+    '''Sum all columns into a single column.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted column.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_col = M.shape(0)
-    ret = Tensor((nb_col, 1))
+    ret = Tensor((M.shape[0], 1))
     singa.SumColumns(M.singa_tensor, ret.singa_tensor)
     return ret
 
 
 def sum_rows(M):
+    '''Sum all rows into a single row.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted row.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_row = M.shape(1)
-    ret = Tensor((1, nb_row))
+    ret = Tensor((1, M.shape[1]))
     singa.SumRows(M.singa_tensor, ret.singa_tensor)
     return ret
 
@@ -500,15 +973,6 @@ def sum_rows(M):
 '''
 
 
-def _tuple_to_vector(tshape):
-    ''' this function converts tuple to std::vector<int>
-    '''
-    vs = singa.Shape(len(tshape))
-    for i in range(len(tshape)):
-        vs[i] = tshape[i]
-    return vs
-
-
 def _call_singa_func(_singa_func, *args):
     ''' this function calls singa global functions that returns Tensor
         and create new python Tensor instance
@@ -516,7 +980,7 @@ def _call_singa_func(_singa_func, *args):
     '''
     new_t = Tensor()
     new_t.singa_tensor = _singa_func(*args)
-    new_t.shape = new_t.singa_tensor.shape()
+    new_t.shape = tuple(new_t.singa_tensor.shape())
     new_t.device = new_t.singa_tensor.device()
     new_t.dtype = new_t.singa_tensor.data_type()
     return new_t

[33/51] [abbrv] incubator-singa git commit: SINGA-223 Use Sphinx to create the website.

Posted by wa...@apache.org.

SINGA-223 Use Sphinx to create the website.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/31ae6bd4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/31ae6bd4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/31ae6bd4

Branch: refs/heads/master
Commit: 31ae6bd46bed53c2bccebaf691bfe18b6addb5e1
Parents: e963363
Author: Moaz Reyad <mo...@gmail.com>
Authored: Fri Aug 12 15:13:37 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:02:50 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                          | 199 +----------------
 doc/_static/style.css                 |   3 +
 doc/_templates/layout.html            |  58 -----
 doc/build.sh                          |  33 ---
 doc/community/issue-tracking.md       |   9 -
 doc/community/mail-lists.rst          |  10 -
 doc/community/source-repository.md    |  22 --
 doc/community/team-list.rst           |  64 ------
 doc/conf.py                           | 339 -----------------------------
 doc/develop/contribute-code.md        |  60 -----
 doc/develop/contribute-docs.md        |  28 ---
 doc/develop/how-contribute.md         |  11 -
 doc/develop/schedule.rst              |  40 ----
 doc/docs.rst                          |   6 -
 doc/docs/cnn.md                       | 141 ------------
 doc/docs/device.rst                   |  38 ----
 doc/docs/examples.rst                 |   6 -
 doc/docs/index.rst                    |  10 -
 doc/docs/installation.md              |  69 ------
 doc/docs/neural-net.md                | 327 ----------------------------
 doc/docs/overview.rst                 |  99 ---------
 doc/docs/software_stack.md            |  99 ---------
 doc/docs/tensor.rst                   |  54 -----
 doc/docs/zh/index.md                  |   9 -
 doc/downloads.md                      |  67 ------
 doc/en/_templates/layout.html         |  61 ++++++
 doc/en/community/issue-tracking.md    |   9 +
 doc/en/community/mail-lists.rst       |  10 +
 doc/en/community/source-repository.md |  22 ++
 doc/en/community/team-list.rst        |  64 ++++++
 doc/en/conf.py                        | 339 +++++++++++++++++++++++++++++
 doc/en/develop/contribute-code.md     |  60 +++++
 doc/en/develop/contribute-docs.md     |  28 +++
 doc/en/develop/how-contribute.md      |  11 +
 doc/en/develop/schedule.rst           |  40 ++++
 doc/en/docs.rst                       |   6 +
 doc/en/docs/cnn.md                    | 141 ++++++++++++
 doc/en/docs/device.rst                |  38 ++++
 doc/en/docs/index.rst                 |  10 +
 doc/en/docs/installation.md           |  69 ++++++
 doc/en/docs/neural-net.md             | 327 ++++++++++++++++++++++++++++
 doc/en/docs/overview.rst              |  99 +++++++++
 doc/en/docs/software_stack.md         |  99 +++++++++
 doc/en/docs/tensor.rst                |  54 +++++
 doc/en/downloads.md                   |  67 ++++++
 doc/en/index.rst                      | 109 ++++++++++
 doc/index.rst                         | 109 ----------
 doc/make.bat                          | 281 ------------------------
 doc/zh/_templates/layout.html         |  61 ++++++
 doc/zh/conf.py                        | 339 +++++++++++++++++++++++++++++
 doc/zh/index.md                       |   9 +
 examples/index.rst                    |   3 +
 52 files changed, 2082 insertions(+), 2184 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index c6eddf1..436a661 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -18,209 +18,18 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
-	@echo "  dirhtml    to make HTML files named index.html in directories"
-	@echo "  singlehtml to make a single large HTML file"
-	@echo "  pickle     to make pickle files"
-	@echo "  json       to make JSON files"
-	@echo "  htmlhelp   to make HTML files and a HTML help project"
-	@echo "  qthelp     to make HTML files and a qthelp project"
-	@echo "  applehelp  to make an Apple Help Book"
-	@echo "  devhelp    to make HTML files and a Devhelp project"
-	@echo "  epub       to make an epub"
-	@echo "  epub3      to make an epub3"
-	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
-	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
-	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
-	@echo "  text       to make text files"
-	@echo "  man        to make manual pages"
-	@echo "  texinfo    to make Texinfo files"
-	@echo "  info       to make Texinfo files and run them through makeinfo"
-	@echo "  gettext    to make PO message catalogs"
-	@echo "  changes    to make an overview of all changed/added/deprecated items"
-	@echo "  xml        to make Docutils-native XML files"
-	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
-	@echo "  linkcheck  to check all external links for integrity"
-	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
-	@echo "  coverage   to run coverage check of the documentation (if enabled)"
-	@echo "  dummy      to check syntax errors of document sources"
 
 .PHONY: clean
 clean:
 	rm -rf $(BUILDDIR)/*
+	rm -rf en/docs/examples
 
 .PHONY: html
 html:
-	cp -rf ../examples docs/
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	cp -rf ../examples en/docs/
+	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html
+	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 
-.PHONY: dirhtml
-dirhtml:
-	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-.PHONY: singlehtml
-singlehtml:
-	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
-	@echo
-	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-.PHONY: pickle
-pickle:
-	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
-	@echo
-	@echo "Build finished; now you can process the pickle files."
-
-.PHONY: json
-json:
-	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
-	@echo
-	@echo "Build finished; now you can process the JSON files."
-
-.PHONY: htmlhelp
-htmlhelp:
-	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
-	@echo
-	@echo "Build finished; now you can run HTML Help Workshop with the" \
-	      ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-.PHONY: qthelp
-qthelp:
-	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
-	@echo
-	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
-	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Singa.qhcp"
-	@echo "To view the help file:"
-	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Singa.qhc"
-
-.PHONY: applehelp
-applehelp:
-	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
-	@echo
-	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
-	@echo "N.B. You won't be able to view it unless you put it in" \
-	      "~/Library/Documentation/Help or install it in your application" \
-	      "bundle."
-
-.PHONY: devhelp
-devhelp:
-	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
-	@echo
-	@echo "Build finished."
-	@echo "To view the help file:"
-	@echo "# mkdir -p $$HOME/.local/share/devhelp/Singa"
-	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Singa"
-	@echo "# devhelp"
-
-.PHONY: epub
-epub:
-	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
-	@echo
-	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-.PHONY: epub3
-epub3:
-	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
-	@echo
-	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
-
-.PHONY: latex
-latex:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo
-	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
-	@echo "Run \`make' in that directory to run these through (pdf)latex" \
-	      "(use \`make latexpdf' here to do that automatically)."
-
-.PHONY: latexpdf
-latexpdf:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through pdflatex..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
 
-.PHONY: latexpdfja
-latexpdfja:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through platex and dvipdfmx..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: text
-text:
-	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
-	@echo
-	@echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-.PHONY: man
-man:
-	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
-	@echo
-	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-.PHONY: texinfo
-texinfo:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo
-	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
-	@echo "Run \`make' in that directory to run these through makeinfo" \
-	      "(use \`make info' here to do that automatically)."
-
-.PHONY: info
-info:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo "Running Texinfo files through makeinfo..."
-	make -C $(BUILDDIR)/texinfo info
-	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-.PHONY: gettext
-gettext:
-	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
-	@echo
-	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-.PHONY: changes
-changes:
-	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
-	@echo
-	@echo "The overview file is in $(BUILDDIR)/changes."
-
-.PHONY: linkcheck
-linkcheck:
-	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-	@echo
-	@echo "Link check complete; look for any errors in the above output " \
-	      "or in $(BUILDDIR)/linkcheck/output.txt."
-
-.PHONY: doctest
-doctest:
-	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
-	@echo "Testing of doctests in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/doctest/output.txt."
-
-.PHONY: coverage
-coverage:
-	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
-	@echo "Testing of coverage in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/coverage/python.txt."
-
-.PHONY: xml
-xml:
-	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
-	@echo
-	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-.PHONY: pseudoxml
-pseudoxml:
-	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
-	@echo
-	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
-
-.PHONY: dummy
-dummy:
-	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
-	@echo
-	@echo "Build finished. Dummy builder generates no files."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/_static/style.css
----------------------------------------------------------------------
diff --git a/doc/_static/style.css b/doc/_static/style.css
new file mode 100644
index 0000000..b07bdb1
--- /dev/null
+++ b/doc/_static/style.css
@@ -0,0 +1,3 @@
+.wy-nav-content {
+    max-width: none;
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html
deleted file mode 100755
index 800a74d..0000000
--- a/doc/_templates/layout.html
+++ /dev/null
@@ -1,58 +0,0 @@
-{#
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements.  See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership.  The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-#}
-{% extends "!layout.html" %}
-     
-{% block footer %}
-
-
-<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
-<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
- 
-  <span class="rst-current-version" data-toggle="rst-current-version">
-    <span class="fa fa-book"> incubator-singa </span>
-    v: {{ version }}
-    <span class="fa fa-caret-down"></span>
-  </span>
-  <div class="rst-other-versions">
-    <dl>
-      <dt>Languages</dt>
-      <dd><a href="/en/latest/">English</a></dd>
-      <dd><a href="/zh/latest/">\u4e2d\u6587</a></dd>	  
-	  <dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
-	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
-	  <dd><a href="/it/latest/">Italiano</a></dd>
-	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd>
-    </dl>
-    <dl>
-      <dt>Versions</dt>
-      <dd><a href="/{{ language }}/latest/">latest</a></dd>
-      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
-      <dd><a href="/{{ language }}/0.2.0/">v0.2.0</a></dd>
-	  <dd><a href="/{{ language }}/0.1.0/">v0.1.0</a></dd>
-    </dl>
-  </div>
-</div>
-
- <a href="https://github.com/apache/incubator-singa">
-    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
-        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
-        alt="Fork me on GitHub">
-</a>
-
-{{ super() }}
-{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/build.sh
----------------------------------------------------------------------
diff --git a/doc/build.sh b/doc/build.sh
deleted file mode 100755
index 3af6ec1..0000000
--- a/doc/build.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-
-#
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-echo "Building English version"
-make -e SPHINXOPTS="-D language='en'" html
-
-#echo "Building Chinese version"
-#make -e SPHINXOPTS="-D language='zh'" html
-
-#echo "Building Japanese version"
-#make -e SPHINXOPTS="-D language='jp'" html
-
-#echo "Building Italian version"
-#make -e SPHINXOPTS="-D language='it'" html 
-
-#echo "Building Arabic version"
-#make -e SPHINXOPTS="-D language='ar'" html

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/issue-tracking.md
----------------------------------------------------------------------
diff --git a/doc/community/issue-tracking.md b/doc/community/issue-tracking.md
deleted file mode 100644
index 26b23dd..0000000
--- a/doc/community/issue-tracking.md
+++ /dev/null
@@ -1,9 +0,0 @@
-## Issue Tracking
-
-___
-
-SINGA uses [JIRA](https://www.atlassian.com/software/jira) a J2EE-based, issue tracking and project management application.
-
-Issues, bugs, and feature requests should be submitted to the following issue tracking system for this project.
-
-* https://issues.apache.org/jira/browse/singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/mail-lists.rst
----------------------------------------------------------------------
diff --git a/doc/community/mail-lists.rst b/doc/community/mail-lists.rst
deleted file mode 100644
index b1ba6f9..0000000
--- a/doc/community/mail-lists.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Project Mailing Lists
-=====================
-
-These are the mailing lists that have been established for this project. For each list, there is a subscribe, unsubscribe, and an archive link.
-
-.. csv-table:: Mailing Lists
-	:header: "Name", "Post", "Subscribe", "Unsubscribe", "Archive"
-
-        "Development", "dev@singa.incubator.apache.org", "[Subscribe](mailto:dev-subscribe@singa.incubator.apache.org)", "[Unsubscribe](mailto:dev-unsubscribe@singa.incubator.apache.org)", "[mail-archives.apache.org](http://mail-archives.apache.org/mod_mbox/singa-dev/)"
-        "Commits", "commits@singa.incubator.apache.org", "[Subscribe](mailto:commits-subscribe@singa.incubator.apache.org)", "[Unsubscribe](mailto:commits-unsubscribe@singa.incubator.apache.org)", "[mail-archives.apache.org](http://mail-archives.apache.org/mod_mbox/singa-commits/)"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/source-repository.md
----------------------------------------------------------------------
diff --git a/doc/community/source-repository.md b/doc/community/source-repository.md
deleted file mode 100644
index 8864629..0000000
--- a/doc/community/source-repository.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Source Repository
-
-___
-
-This project uses [Git](http://git-scm.com/) to manage its source code. Instructions on Git use can be found at [http://git-scm.com/documentation](http://git-scm.com/documentation).
-
-## Web Access
-
-The following is a link to the online source repository.
-
-* [https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary](https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary)
-
-
-## Upstream for committers
-
-Committers need to set the upstream endpoint to the Apache git (not github) repo address, e.g.,
-
-    $ git remote add asf https://git-wip-us.apache.org/repos/asf/incubator-singa.git
-
-Then you (committer) can push your code in this way,
-
-    $ git push asf <local-branch>:<remote-branch>

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/team-list.rst
----------------------------------------------------------------------
diff --git a/doc/community/team-list.rst b/doc/community/team-list.rst
deleted file mode 100644
index a677aff..0000000
--- a/doc/community/team-list.rst
+++ /dev/null
@@ -1,64 +0,0 @@
-The SINGA Team
-==============
-
-A successful project requires many people to play many roles. Some members write code or documentation, while others are valuable as testers, submitting patches and suggestions.
-
-Mentors
--------
-
-==================   ============
-Name                 Email
-==================   ============
-Daniel Dai           daijy@apache.org
-Ted Dunning	     tdunning@apache.org
-Alan Gates	     gates@apache.org
-Thejas Nair	     thejas@apache.org
-==================   ============
-
-Developers
-----------
-
-+-------------------+--------------------------------+----------------------------------------------+
-| Name              |  Email                         |  Organization                                |
-+-------------------+--------------------------------+----------------------------------------------+
-|Gang Chen          |  cg@zju.edu.cn                 |   Zhejiang University                        |
-+-------------------+--------------------------------+----------------------------------------------+
-| Haibo Chen        | hzchenhaibo@corp.netease.com   |  NetEase                                     |
-+-------------------+--------------------------------+----------------------------------------------+
-| Anh Dinh	    |     dinhtta@apache.org	     |         National University of Singapore     |                       
-+-------------------+--------------------------------+----------------------------------------------+
-| Jinyang Gao	    |     jinyang@apache.org	     |         National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Xing Ji	    |         jixin@comp.nus.edu.sg  |          National University of Singapore    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Chonho Lee	    |  chonho@gmail.com              |   National University of Singapore           |
-+-------------------+--------------------------------+----------------------------------------------+
-| Zhaojing Luo	    | zhaojing@apache.org	     | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Beng Chin Ooi	    | ooibc@comp.nus.edu.sg          | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Kian-Lee Tan	    |    tankl@apache.org            | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-|Anthony K. H. Tung |  atung@comp.nus.edu.sg         |   National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Ji Wang	    |         wangji@comp.nus.edu.sg |	      National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Sheng Wang	    |    wangsh@apache.org           | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Wei Wang	    |    wangwei@apache.org	     |         National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Yuan Wang         |  wangyuan@corp.netease.com     |   NetEase                                    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Wenfeng Wu	    |     wuwf@comp.nus.edu.sg       |  National University of Singapore            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Zhongle Xie	    |     zhongle@apache.org	     |        National University of Singapore      |
-+-------------------+--------------------------------+----------------------------------------------+
-| Meihui Zhang	    |     meihui_zhang@sutd.edu.sg   |Singapore University of Technology and Design |
-+-------------------+--------------------------------+----------------------------------------------+
-| Kaiping Zheng     |     kaiping@apache.org	     |         National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Ming Zhong        | hzzhongming15@corp.netease.com |   Zhejiang University                        |
-+-------------------+--------------------------------+----------------------------------------------+
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
deleted file mode 100755
index 9d4480e..0000000
--- a/doc/conf.py
+++ /dev/null
@@ -1,339 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# incubator-singa documentation build configuration file, created by
-# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../build/python'))
-
-# -- General configuration ------------------------------------------------
-from recommonmark.parser import CommonMarkParser
-
-source_parsers = {
-    '.md': CommonMarkParser,
-}
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
-napoleon_google_docstring = True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = ['.rst', '.md']
-
-# The encoding of source files.
-#
-source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'incubator-singa'
-copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
-author = u'moaz'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = u'1.0.0'
-# The full version, including alpha/beta/rc tags.
-release = u'1.0.0'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#
-# today = ''
-#
-# Else, today_fmt is used as the format for a strftime call.
-#
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-# keep_warnings = False
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents.
-# "<project> v<release> documentation" by default.
-#
-# html_title = u'Singa v1.0.0'
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#
-html_logo = 'image/singa.png'
-
-# The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#
-# html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#
-# html_extra_path = []
-
-# If not None, a 'Last updated on:' timestamp is inserted at every page
-# bottom, using the given strftime format.
-# The empty string is equivalent to '%b %d, %Y'.
-#
-# html_last_updated_fmt = None
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-#
-# html_domain_indices = True
-
-# If false, no index is generated.
-#
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#
-# html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
-#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
-#
-# html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# 'ja' uses this config value.
-# 'zh' user can custom change `jieba` dictionary path.
-#
-# html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#
-# html_search_scorer = 'scorer.js'
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'Singadoc'
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-     # The paper size ('letterpaper' or 'a4paper').
-     #
-     # 'papersize': 'letterpaper',
-
-     # The font size ('10pt', '11pt' or '12pt').
-     #
-     # 'pointsize': '10pt',
-
-     # Additional stuff for the LaTeX preamble.
-     #
-     # 'preamble': '',
-
-     # Latex figure (float) alignment
-     #
-     # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
-     u'moaz', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-#
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#
-# latex_appendices = []
-
-# If false, no module index is generated.
-#
-# latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     [author], 1)
-]
-
-# If true, show URL addresses after external links.
-#
-# man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     author, 'incubator-singa', 'One line description of project.',
-     'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-#
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#
-# texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#
-# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/contribute-code.md
----------------------------------------------------------------------
diff --git a/doc/develop/contribute-code.md b/doc/develop/contribute-code.md
deleted file mode 100644
index 98e5aee..0000000
--- a/doc/develop/contribute-code.md
+++ /dev/null
@@ -1,60 +0,0 @@
-## How to Contribute Code
-
-_____
-
-### Coding Style
-
-The SINGA codebase follows the [Google C++ Style Guide](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml).
-
-To check if your code follows the style, you can use the provided cpplint tool:
-    
-    $ ./tool/cpplint.py YOUR_FILE
-
-
-### JIRA format
-
-Like other Apache projects, SINGA uses JIRA to track bugs, improvements and
-other high-level discussions (e.g., system design and features).  Github pull requests are
-used for implementation discussions, e.g., code review and code merge.
-
-* Provide a descriptive Title.
-* Write a detailed Description. For bug reports, this should ideally include a
-  short reproduction of the problem. For new features, it may include a design
-  document.
-* Set [required fields](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-JIRA)
-
-### Pull Request
-
-The work flow is
-
-* Fork the [SINGA Github repository](https://github.com/apache/incubator-singa) to
-your own Github account.
-
-* Clone your fork, create a new branch (e.g., feature-foo or fixbug-foo),
- work on it. After finishing your job,
- [rebase](https://git-scm.com/book/en/v2/Git-Branching-Rebasing) it to the
- current latest master and push commits to your own Github account (the new
- branch).
-
-* Open a pull request against the master branch of apache/incubator-singa.
-The PR title should be of the form SINGA-xxxx Title, where
-SINGA-xxxx is the relevant JIRA number, and Title may be the JIRA's title or a
-more specific title describing the PR itself, for example, "SINGA-6 Implement thread-safe singleton". Detailed description can be copied from the JIRA.
-Consider identifying committers or other contributors who have worked on the
-code being changed. Find the file(s) in Github and click "Blame" to see a
-line-by-line annotation of who changed the code last.  You can add @username in
-the PR description to ping them immediately.
-Please state that the contribution is your original work and that you license
-the work to the project under the project's open source license. Further commits (e.g., bug fix)
-to your new branch will be added to this pull request automatically by Github.
-
-* Wait for one committer to review the patch. If no conflicts, the committers will merge it with
-the master branch. The merge should a) not use rebase b) disable fast forward merge c) check the 
-commit message format and test the code/feature.
-
-* If there are too many small commit messages, you will be told to squash your commits into fewer meaningful
-commits. If your commit message does not follow the format (i.e., SINGA-xxxx), you will be told to
-reword your commit message. Both changes can be done using interactive git rebase. Once you
-get the commits corrected, push them to you own github again. Your pull request 
-will be automatically updated. For details, please refer to 
-[Rebase Pull Requests](https://github.com/edx/edx-platform/wiki/How-to-Rebase-a-Pull-Request).
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/contribute-docs.md
----------------------------------------------------------------------
diff --git a/doc/develop/contribute-docs.md b/doc/develop/contribute-docs.md
deleted file mode 100644
index 5e21a0f..0000000
--- a/doc/develop/contribute-docs.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# How to Contribute Documentation
-
-___
-
-
-## Website
-This document gives step-by-step instructions for deploying [Singa website](http://singa.incubator.apache.org).
-
-Singa website is built by [Sphinx](http://www.sphinx-doc.org) 1.4.4 from a source tree stored in git: https://github.com/apache/incubator-singa/tree/master/doc.
-
-To install Sphinx on Ubuntu:
-
-    $ apt-get install python-sphinx
-
-To install the markdown support for Sphinx:
-
-    $ pip install recommonmark
-
-You can build the website by executing the following command from the doc folder:
-
-    $ make html
-
-The procedure for contributing documentation is the same as [contributing code](contribute-code.html).
-
-
-## CPP API
-
-To generate docs, run "doxygen" from the doc folder (Doxygen >= 1.8 recommended)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/how-contribute.md
----------------------------------------------------------------------
diff --git a/doc/develop/how-contribute.md b/doc/develop/how-contribute.md
deleted file mode 100644
index 8687b5a..0000000
--- a/doc/develop/how-contribute.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# How to Contribute to SINGA
-
-___
-
-As with any open source project, there are several ways you can help:
-
-* Join the [mailing list](../community/mail-lists.html) and answer other user's questions.
-* [Build Singa](../quick-start.html) for yourself, in order to fix bugs.
-* Report bugs, feature requests and other issues in the [issue tracking](../community/issue-tracking.html) application.
-* Check SINGA's [development schedule](schedule.html) and [contribute code](contribute-code.html) by providing patches.
-* [Help with the documentation](contribute-docs.html) by updating webpages that are lacking or unclear.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/schedule.rst
----------------------------------------------------------------------
diff --git a/doc/develop/schedule.rst b/doc/develop/schedule.rst
deleted file mode 100644
index 2afe54f..0000000
--- a/doc/develop/schedule.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-Development Schedule
-====================
-
-.. csv-table::
-	:header: "Release", "Module", "Feature", "Status"
-
-	" 0.1 Sep 2015     "," Neural Network          "," Feed forward neural network, including CNN, MLP                                                                 "," done  "
-	"                  ","                         "," RBM-like model, including RBM                                                                                   "," done   "
-	"                  ","                         "," Recurrent neural network, including standard RNN                                                                "," done   "
-	"                  ","  Architecture           "," One worker group on single node (with data partition)                                                           "," done   "
-	"                  ","                         "," Multi worker groups on single node using [Hogwild](http://www.eecs.berkeley.edu/~brecht/papers/hogwildTR.pdf)      ","done"
-	"                  ","                         "," Distributed Hogwild","done"
-	"                  ","                         "," Multi groups across nodes, like [Downpour](http://papers.nips.cc/paper/4687-large-scale-distributed-deep-networks) ","done"
-	"                  ","                         "," All-Reduce training architecture like [DeepImage](http://arxiv.org/abs/1501.02876) ","done"
-	"                  ","                         "," Load-balance among servers "," done"
-	"                  ","  Failure recovery       "," Checkpoint and restore ","done"
-	"                  ","  Tools                  "," Installation with GNU auto tools"," done"
-	"0.2 Jan 2016      "," Neural Network          "," Feed forward neural network, including AlexNet, cuDNN layers, etc."," done "
-	"                  ","                         "," Recurrent neural network, including GRULayer and BPTT","done "
-	"                  ","                         "," Model partition and hybrid partition","done"
-	"      		   "," Tools                   "," Integration with Mesos for resource management","done"
-	"         	   ","                         "," Prepare Docker images for deployment","done"
-	"              	   ","                         "," Visualization of neural net and debug information ","done"
-	"                  "," Binding                 "," Python binding for major components ","done"
-	"                  "," GPU                     "," Single node with multiple GPUs ","done"
-	"0.3 April 2016    "," GPU                     "," Multiple nodes, each with multiple GPUs","done"
-	"                  ","                         "," Heterogeneous training using both GPU and CPU [CcT](http://arxiv.org/abs/1504.04343)","done"
-	"                  ","                         "," Support cuDNN v4 "," done"
-	"                  "," Installation            "," Remove dependency on ZeroMQ, CZMQ, Zookeeper for single node training","done"
-	"                  "," Updater                 "," Add new SGD updaters including Adam, AdamMax and AdaDelta","done"
-	"                  "," Binding                 "," Enhance Python binding for training","done"
-	"1.0 July 2016     "," Programming abstraction ","Tensor with linear algebra, neural net and random operations "," "
-	"                  ","                         ","Updater for distributed parameter updating ",""
-	"                  "," Optimization            "," Execution and memory optimization",""
-	"                  "," Hardware                "," Use Cuda and Cudnn for Nvidia GPU",""
-	"                  ","                         "," Use OpenCL for AMD GPU or other devices",""
-	"                  "," Cross-platform          "," To extend from Linux to MacOS and Windows",""
-	"                  "," Examples                "," Speech recognition example",""
-	"                  ","                         ","Large image models, e.g., [GoogLeNet](http://arxiv.org/abs/1409.4842), [VGG](https://arxiv.org/pdf/1409.1556.pdf) and [Residual Net](http://arxiv.org/abs/1512.03385)",""
-	"     "," Rafiki                  "," Deep learning as a service "," "

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
deleted file mode 100644
index 400b12a..0000000
--- a/doc/docs.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Documentation
-=============
-
-.. toctree::
-   docs/index
-   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/cnn.md
----------------------------------------------------------------------
diff --git a/doc/docs/cnn.md b/doc/docs/cnn.md
deleted file mode 100755
index 21ef1f7..0000000
--- a/doc/docs/cnn.md
+++ /dev/null
@@ -1,141 +0,0 @@
-#Quickstart - Cifar10 example
-Convolution neural network (CNN) is a type of feed-forward artificial neural network widely used for image classification. In this example, we will use a deep CNN model to do image classification for the [CIFAR10 dataset](http://www.cs.toronto.edu/~kriz/cifar.html).
-
-## Running instructions for CPP version
-Please refer to [Installation](installation.html) page for how to install SINGA. Currently, we CNN requires CUDNN, hence both CUDA and CUDNN should be installed and SINGA should be compiled with CUDA and CUDNN.
-
-The Cifar10 dataset could be downloaded by running
-
-    # switch to cifar10 directory
-    $ cd ../examples/cifar10
-    # download data for CPP version
-    $ python download_data.py bin
-
-'bin' is for downloading binary version of Cifar10 data.
-
-During downloading, you should see the detailed output like
-
-     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
-     The tar file does exist. Extracting it now..
-     Finished!
-
-Now you have prepared the data for this Cifar10 example, the final step is to execute the `run.sh` script,
-
-    # in SINGA_ROOT/examples/cifar10/
-    $ ./run.sh
-
-You should see the detailed output as follows: first read the data files in order, show the statistics of training and testing data, then show the details of neural net structure with some parameter information, finally illustrate the performance details during training and validation process. The number of epochs can be specified in `run.sh` file.
-
-    Start training
-    Reading file cifar-10-batches-bin/data_batch_1.bin
-    Reading file cifar-10-batches-bin/data_batch_2.bin
-    Reading file cifar-10-batches-bin/data_batch_3.bin
-    Reading file cifar-10-batches-bin/data_batch_4.bin
-    Reading file cifar-10-batches-bin/data_batch_5.bin
-    Reading file cifar-10-batches-bin/test_batch.bin
-    Training samples = 50000, Test samples = 10000
-    conv1(32, 32, 32, )
-    pool1(32, 16, 16, )
-    relu1(32, 16, 16, )
-    lrn1(32, 16, 16, )
-    conv2(32, 16, 16, )
-    relu2(32, 16, 16, )
-    pool2(32, 8, 8, )
-    lrn2(32, 8, 8, )
-    conv3(64, 8, 8, )
-    relu3(64, 8, 8, )
-    pool3(64, 4, 4, )
-    flat(1024, )
-    ip(10, )
-    conv1_weight : 8.09309e-05
-    conv1_bias : 0
-    conv2_weight : 0.00797731
-    conv2_bias : 0
-    conv3_weight : 0.00795888
-    conv3_bias : 0
-    ip_weight : 0.00798683
-    ip_bias : 0
-    Messages will be appended to an existed file: train_perf
-    Messages will be appended to an existed file: val_perf
-    Epoch 0, training loss = 1.828369, accuracy = 0.329420, lr = 0.001000
-    Epoch 0, val loss = 1.561823, metric = 0.420600
-    Epoch 1, training loss = 1.465898, accuracy = 0.469940, lr = 0.001000
-    Epoch 1, val loss = 1.361778, metric = 0.513300
-    Epoch 2, training loss = 1.320708, accuracy = 0.529000, lr = 0.001000
-    Epoch 2, val loss = 1.242080, metric = 0.549100
-    Epoch 3, training loss = 1.213776, accuracy = 0.571620, lr = 0.001000
-    Epoch 3, val loss = 1.175346, metric = 0.582000
-
-The training details are stored in `train_perf` file in the same directory and the validation details in `val_perf` file.
-
-
-## Running instructions for Python version
-To run CNN example in Python version, we need to compile SINGA with Python binding,
-
-    $ mkdir build && cd build
-    $ cmake -DUSE_PYTHON=ON ..
-    $ make
-
-Now download the Cifar10 dataset,
-
-    # switch to cifar10 directory
-    $ cd ../examples/cifar10
-    # download data for Python version
-    $ python download_data.py py
-
-During downloading, you should see the detailed output like
-
-     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
-     The tar file does exist. Extracting it now..
-     Finished!
-
-Then execute the `train.py` script to build the model
-
-    $ python train.py
-
-You should see the output as follows including the details of neural net structure with some parameter information, reading data files, and the performance details during training and testing process.
-
-    (32L, 32L, 32L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 8L, 8L)
-    (32L, 8L, 8L)
-    (64L, 8L, 8L)
-    (64L, 8L, 8L)
-    (64L, 4L, 4L)
-    (1024L,)
-    Start intialization............
-    conv1_weight gaussian 7.938460476e-05
-    conv1_bias constant 0.0
-    conv2_weight gaussian 0.00793507322669
-    conv2_bias constant 0.0
-    conv3_weight gaussian 0.00799657031894
-    conv3_bias constant 0.0
-    dense_weight gaussian 0.00804364029318
-    dense_bias constant 0.0
-    Loading data ..................
-    Loading data file cifar-10-batches-py/data_batch_1
-    Loading data file cifar-10-batches-py/data_batch_2
-    Loading data file cifar-10-batches-py/data_batch_3
-    Loading data file cifar-10-batches-py/data_batch_4
-    Loading data file cifar-10-batches-py/data_batch_5
-    Loading data file cifar-10-batches-py/test_batch
-    Epoch 0
-    training loss = 1.881866, training accuracy = 0.306360 accuracy = 0.420000
-    test loss = 1.602577, test accuracy = 0.412200
-    Epoch 1
-    training loss = 1.536011, training accuracy = 0.441940 accuracy = 0.500000
-    test loss = 1.378170, test accuracy = 0.507600
-    Epoch 2
-    training loss = 1.333137, training accuracy = 0.519960 accuracy = 0.520000
-    test loss = 1.272205, test accuracy = 0.540600
-    Epoch 3
-    training loss = 1.185212, training accuracy = 0.574120 accuracy = 0.540000
-    test loss = 1.211573, test accuracy = 0.567600
-
-This script will call `alexnet.py` file to build the alexnet model. After the training is finished, SINGA will save the model parameters into a checkpoint file `model.bin` in the same directory. Then we can use this `model.bin` file for prediction.
-
-    $ python predict.py

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
deleted file mode 100644
index e79d87a..0000000
--- a/doc/docs/device.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-Device
-=======
-
-
-The Device abstract represents any hardware device with memory and compuation units.
-All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
-Tensor memory is also managed by the device's memory manager. Therefore, optimization
-of memory and execution are implemented in the Device class.
-
-Specific devices
-----------------
-Currently, SINGA has three Device implmentations,
-
-1. CudaGPU for an Nvidia GPU card which runs Cuda code
-2. CppCPU for a CPU which runs Cpp code
-3. OpenclGPU for a GPU card which runs OpenCL code
-
-
-Python API
-----------
-
-.. automodule:: singa.device
-   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
-
-
-The following code provides examples of creating devices,
-
-.. code:: python
-
-   from singa import device
-   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
-   host = device.get_default_device()  # get the default host device (a CppCPU)
-   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
-   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
-
-
-CPP API
----------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
deleted file mode 100644
index b0b2af8..0000000
--- a/doc/docs/examples.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Examples
-========
-
-.. toctree::
-
-   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
deleted file mode 100644
index 2f6352e..0000000
--- a/doc/docs/index.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-English
-=======
-
-.. toctree::
-
-   installation
-   software_stack
-   device
-   tensor
-   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/docs/installation.md b/doc/docs/installation.md
deleted file mode 100755
index 8ab617f..0000000
--- a/doc/docs/installation.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Building SINGA from source
-
-## Dependencies
-
-### Required
-* Google Protobuf (>=2.5)
-* BLAS (tested with OpenBLAS >=0.2.10)
-* CUDA (tested with 6.5, 7.0 and 7.5)
-* CUDNN (v4 and v5)
-* cmake (>=2.6)
-
-Users must install the above mandatory libraries.
-Currently CUDA and CUDNN are also mandatory, but it would become optional later.
-
-### Optional
-* Glog
-* OpenCV (tested with 2.4.8)
-* LMDB (tested with 0.9)
-
-
-## Instructions
-
-Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
-
-
-    $ git clone https://github.com/apache/incubator-singa.git
-    $ cd incubator-singa/
-    # switch to dev branch
-    $ git checkout dev
-
-
-If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
-which could be downloaded as
-
-    $ git submodule init
-    $ git submodule update
-
-
-### Linux OS
-
-GCC (>=4.8.1) is required to compile SINGA on Linux OS.
-In SINGA_ROOT, execute the following commands for compiling SINGA,
-
-    $ mkdir build && cd build
-    # generate Makefile for compilation
-    $ cmake ..
-    # compile SINGA
-    $ make
-
-Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
-
-    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
-    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
-
-You can use `ccmake ..` to configure the compilation options including using
-LMDB, GLOG, etc.
-
-After compiling SINGA, you can run the unit tests by
-
-    $ ./bin/test_singa
-
-You can see all the testing cases with testing results. If SINGA passes all
-tests, then you have successfully installed SINGA. Please proceed to try the examples!
-
-
-### MacOS
-
-
-### Windows

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/neural-net.md
----------------------------------------------------------------------
diff --git a/doc/docs/neural-net.md b/doc/docs/neural-net.md
deleted file mode 100644
index c10baf8..0000000
--- a/doc/docs/neural-net.md
+++ /dev/null
@@ -1,327 +0,0 @@
-# Neural Net
-
----
-
-`NeuralNet` in SINGA represents an instance of user's neural net model. As the
-neural net typically consists of a set of layers, `NeuralNet` comprises
-a set of unidirectionally connected [Layer](layer.html)s.
-This page describes how to convert an user's neural net into
-the configuration of `NeuralNet`.
-
-<img src="../_static/images/model-category.png" align="center" width="200px"/>
-<span><strong>Figure 1 - Categorization of popular deep learning models.</strong></span>
-
-## Net structure configuration
-
-Users configure the `NeuralNet` by listing all layers of the neural net and
-specifying each layer's source layer names. Popular deep learning models can be
-categorized as Figure 1. The subsequent sections give details for each
-category.
-
-### Feed-forward models
-
-<div align = "left">
-<img src="../_static/images/mlp-net.png" align="center" width="200px"/>
-<span><strong>Figure 2 - Net structure of a MLP model.</strong></span>
-</div>
-
-Feed-forward models, e.g., CNN and MLP, can easily get configured as their layer
-connections are undirected without circles. The
-configuration for the MLP model shown in Figure 1 is as follows,
-
-    net {
-      layer {
-        name : 'data"
-        type : kData
-      }
-      layer {
-        name : 'image"
-        type : kImage
-        srclayer: 'data'
-      }
-      layer {
-        name : 'label"
-        type : kLabel
-        srclayer: 'data'
-      }
-      layer {
-        name : 'hidden"
-        type : kHidden
-        srclayer: 'image'
-      }
-      layer {
-        name : 'softmax"
-        type : kSoftmaxLoss
-        srclayer: 'hidden'
-        srclayer: 'label'
-      }
-    }
-
-### Energy models
-
-<img src="../_static/images/rbm-rnn.png" align="center" width="500px"/>
-<span><strong>Figure 3 - Convert connections in RBM and RNN.</strong></span>
-
-
-For energy models including RBM, DBM,
-etc., their connections are undirected (i.e., Category B). To represent these models using
-`NeuralNet`, users can simply replace each connection with two directed
-connections, as shown in Figure 3a. In other words, for each pair of connected layers, their source
-layer field should include each other's name.
-The full [RBM example](rbm.html) has
-detailed neural net configuration for a RBM model, which looks like
-
-    net {
-      layer {
-        name : "vis"
-        type : kVisLayer
-        param {
-          name : "w1"
-        }
-        srclayer: "hid"
-      }
-      layer {
-        name : "hid"
-        type : kHidLayer
-        param {
-          name : "w2"
-          share_from: "w1"
-        }
-        srclayer: "vis"
-      }
-    }
-
-### RNN models
-
-For recurrent neural networks (RNN), users can remove the recurrent connections
-by unrolling the recurrent layer.  For example, in Figure 3b, the original
-layer is unrolled into a new layer with 4 internal layers. In this way, the
-model is like a normal feed-forward model, thus can be configured similarly.
-The [RNN example](rnn.html) has a full neural net
-configuration for a RNN model.
-
-
-## Configuration for multiple nets
-
-Typically, a training job includes three neural nets for
-training, validation and test phase respectively. The three neural nets share most
-layers except the data layer, loss layer or output layer, etc..  To avoid
-redundant configurations for the shared layers, users can uses the `exclude`
-filed to filter a layer in the neural net, e.g., the following layer will be
-filtered when creating the testing `NeuralNet`.
-
-
-    layer {
-      ...
-      exclude : kTest # filter this layer for creating test net
-    }
-
-
-
-## Neural net partitioning
-
-A neural net can be partitioned in different ways to distribute the training
-over multiple workers.
-
-### Batch and feature dimension
-
-<img src="../_static/images/partition_fc.png" align="center" width="400px"/>
-<span><strong>Figure 4 - Partitioning of a fully connected layer.</strong></span>
-
-
-Every layer's feature blob is considered a matrix whose rows are feature
-vectors. Thus, one layer can be split on two dimensions. Partitioning on
-dimension 0 (also called batch dimension) slices the feature matrix by rows.
-For instance, if the mini-batch size is 256 and the layer is partitioned into 2
-sub-layers, each sub-layer would have 128 feature vectors in its feature blob.
-Partitioning on this dimension has no effect on the parameters, as every
-[Param](param.html) object is replicated in the sub-layers. Partitioning on dimension
-1 (also called feature dimension) slices the feature matrix by columns. For
-example, suppose the original feature vector has 50 units, after partitioning
-into 2 sub-layers, each sub-layer would have 25 units. This partitioning may
-result in [Param](param.html) object being split, as shown in
-Figure 4. Both the bias vector and weight matrix are
-partitioned into two sub-layers.
-
-
-### Partitioning configuration
-
-There are 4 partitioning schemes, whose configurations are give below,
-
-  1. Partitioning each singe layer into sub-layers on batch dimension (see
-  below). It is enabled by configuring the partition dimension of the layer to
-  0, e.g.,
-
-          # with other fields omitted
-          layer {
-            partition_dim: 0
-          }
-
-  2. Partitioning each singe layer into sub-layers on feature dimension (see
-  below).  It is enabled by configuring the partition dimension of the layer to
-  1, e.g.,
-
-          # with other fields omitted
-          layer {
-            partition_dim: 1
-          }
-
-  3. Partitioning all layers into different subsets. It is enabled by
-  configuring the location ID of a layer, e.g.,
-
-          # with other fields omitted
-          layer {
-            location: 1
-          }
-          layer {
-            location: 0
-          }
-
-
-  4. Hybrid partitioning of strategy 1, 2 and 3. The hybrid partitioning is
-  useful for large models. An example application is to implement the
-  [idea proposed by Alex](http://arxiv.org/abs/1404.5997).
-  Hybrid partitioning is configured like,
-
-          # with other fields omitted
-          layer {
-            location: 1
-          }
-          layer {
-            location: 0
-          }
-          layer {
-            partition_dim: 0
-            location: 0
-          }
-          layer {
-            partition_dim: 1
-            location: 0
-          }
-
-Currently SINGA supports strategy-2 well. Other partitioning strategies are
-are under test and will be released in later version.
-
-## Parameter sharing
-
-Parameters can be shared in two cases,
-
-  * sharing parameters among layers via user configuration. For example, the
-  visible layer and hidden layer of a RBM shares the weight matrix, which is configured through
-  the `share_from` field as shown in the above RBM configuration. The
-  configurations must be the same (except name) for shared parameters.
-
-  * due to neural net partitioning, some `Param` objects are replicated into
-  different workers, e.g., partitioning one layer on batch dimension. These
-  workers share parameter values. SINGA controls this kind of parameter
-  sharing automatically, users do not need to do any configuration.
-
-  * the `NeuralNet` for training and testing (and validation) share most layers
-  , thus share `Param` values.
-
-If the shared `Param` instances resident in the same process (may in different
-threads), they use the same chunk of memory space for their values. But they
-would have different memory spaces for their gradients. In fact, their
-gradients will be averaged by the stub or server.
-
-## Advanced user guide
-
-### Creation
-
-    static NeuralNet* NeuralNet::Create(const NetProto& np, Phase phase, int num);
-
-The above function creates a `NeuralNet` for a given phase, and returns a
-pointer to the `NeuralNet` instance. The phase is in {kTrain,
-kValidation, kTest}. `num` is used for net partitioning which indicates the
-number of partitions.  Typically, a training job includes three neural nets for
-training, validation and test phase respectively. The three neural nets share most
-layers except the data layer, loss layer or output layer, etc.. The `Create`
-function takes in the full net configuration including layers for training,
-validation and test.  It removes layers for phases other than the specified
-phase based on the `exclude` field in
-[layer configuration](layer.html):
-
-    layer {
-      ...
-      exclude : kTest # filter this layer for creating test net
-    }
-
-The filtered net configuration is passed to the constructor of `NeuralNet`:
-
-    NeuralNet::NeuralNet(NetProto netproto, int npartitions);
-
-The constructor creates a graph representing the net structure firstly in
-
-    Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions);
-
-Next, it creates a layer for each node and connects layers if their nodes are
-connected.
-
-    void NeuralNet::CreateNetFromGraph(Graph* graph, int npartitions);
-
-Since the `NeuralNet` instance may be shared among multiple workers, the
-`Create` function returns a pointer to the `NeuralNet` instance .
-
-### Parameter sharing
-
- `Param` sharing
-is enabled by first sharing the Param configuration (in `NeuralNet::Create`)
-to create two similar (e.g., the same shape) Param objects, and then calling
-(in `NeuralNet::CreateNetFromGraph`),
-
-    void Param::ShareFrom(const Param& from);
-
-It is also possible to share `Param`s of two nets, e.g., sharing parameters of
-the training net and the test net,
-
-    void NeuralNet:ShareParamsFrom(NeuralNet* other);
-
-It will call `Param::ShareFrom` for each Param object.
-
-### Access functions
-`NeuralNet` provides a couple of access function to get the layers and params
-of the net:
-
-    const std::vector<Layer*>& layers() const;
-    const std::vector<Param*>& params() const ;
-    Layer* name2layer(string name) const;
-    Param* paramid2param(int id) const;
-
-
-### Partitioning
-
-
-#### Implementation
-
-SINGA partitions the neural net in `CreateGraph` function, which creates one
-node for each (partitioned) layer. For example, if one layer's partition
-dimension is 0 or 1, then it creates `npartition` nodes for it; if the
-partition dimension is -1, a single node is created, i.e., no partitioning.
-Each node is assigned a partition (or location) ID. If the original layer is
-configured with a location ID, then the ID is assigned to each newly created node.
-These nodes are connected according to the connections of the original layers.
-Some connection layers will be added automatically.
-For instance, if two connected sub-layers are located at two
-different workers, then a pair of bridge layers is inserted to transfer the
-feature (and gradient) blob between them. When two layers are partitioned on
-different dimensions, a concatenation layer which concatenates feature rows (or
-columns) and a slice layer which slices feature rows (or columns) would be
-inserted. These connection layers help making the network communication and
-synchronization transparent to the users.
-
-#### Dispatching partitions to workers
-
-Each (partitioned) layer is assigned a location ID, based on which it is dispatched to one
-worker. Particularly, the pointer to the `NeuralNet` instance is passed
-to every worker within the same group, but each worker only computes over the
-layers that have the same partition (or location) ID as the worker's ID.  When
-every worker computes the gradients of the entire model parameters
-(strategy-2), we refer to this process as data parallelism.  When different
-workers compute the gradients of different parameters (strategy-3 or
-strategy-1), we call this process model parallelism.  The hybrid partitioning
-leads to hybrid parallelism where some workers compute the gradients of the
-same subset of model parameters while other workers compute on different model
-parameters.  For example, to implement the hybrid parallelism in for the
-[DCNN model](http://arxiv.org/abs/1404.5997), we set `partition_dim = 0` for
-lower layers and `partition_dim = 1` for higher layers.
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/overview.rst
----------------------------------------------------------------------
diff --git a/doc/docs/overview.rst b/doc/docs/overview.rst
deleted file mode 100644
index 18ad62b..0000000
--- a/doc/docs/overview.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-Introduction
-==============
-
-
-SINGA is a general distributed deep learning platform for training big deep
-learning models over large datasets. It is designed with an intuitive
-programming model based on the layer abstraction. A variety
-of popular deep learning models are supported, namely feed-forward models including
-convolutional neural networks (CNN), energy models like restricted Boltzmann
-machine (RBM), and recurrent neural networks (RNN). Many built-in layers are
-provided for users. SINGA architecture is
-sufficiently flexible to run synchronous, asynchronous and hybrid training
-frameworks.  SINGA
-also supports different neural net partitioning schemes to parallelize the
-training of large models, namely partitioning on batch dimension, feature
-dimension or hybrid partitioning.
-
-
-Goals
------
-
-As a distributed system, the first goal of SINGA is to have good scalability. In other
-words, SINGA is expected to reduce the total training time to achieve certain
-accuracy with more computing resources (i.e., machines).
-
-
-The second goal is to make SINGA easy to use.
-It is non-trivial for programmers to develop and train models with deep and
-complex model structures.  Distributed training further increases the burden of
-programmers, e.g., data and model partitioning, and network communication.  Hence it is essential to
-provide an easy to use programming model so that users can implement their deep
-learning models/algorithms without much awareness of the underlying distributed
-platform.
-
-Principles
-----------
-
-Scalability is a challenging research problem for distributed deep learning
-training. SINGA provides a general architecture to exploit the scalability of
-different training frameworks. Synchronous training frameworks improve the
-efficiency of one training iteration, and
-asynchronous training frameworks improve the convergence rate. Given a fixed budget
-(e.g., cluster size), users can run a hybrid framework that maximizes the
-scalability by trading off between efficiency and convergence rate.
-
-SINGA comes with a programming model designed based on the layer abstraction, which
-is intuitive for deep learning models.  A variety of
-popular deep learning models can be expressed and trained using this programming model.
-
-System overview
----------------
-
-.. figure:: /image/sgd.png
-
-            Figure 1 - SGD flow
-
-Training a deep learning model is to find the optimal parameters involved in
-the transformation functions that generate good features for specific tasks.
-The goodness of a set of parameters is measured by a loss function, e.g.,
-`Cross-Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_ . Since the
-loss functions are usually non-linear and non-convex, it is difficult to get a
-closed form solution. Typically, people use the stochastic gradient descent
-(SGD) algorithm, which randomly
-initializes the parameters and then iteratively updates them to reduce the loss
-as shown in Figure 1.
-
-.. figure:: /image/overview.png
-
-           Figure 2 - SINGA overview
-
-SGD is used in SINGA to train
-parameters of deep learning models. The training workload is distributed over
-worker and server units as shown in Figure 2. In each
-iteration, every worker calls *TrainOneBatch* function to compute
-parameter gradients. *TrainOneBatch* takes a *NeuralNet* object
-representing the neural net, and visits layers of the *NeuralNet* in
-certain order. The resultant gradients are sent to the local stub that
-aggregates the requests and forwards them to corresponding servers for
-updating. Servers reply to workers with the updated parameters for the next
-iteration.
-
-
-Job submission
---------------
-
-To submit a job in SINGA (i.e., training a deep learning model),
-users pass the job configuration to SINGA driver in the
-`main function <programming-guide.html>`_ . The job configuration
-specifies the four major components in Figure 2,
-
-  * a `NeuralNet <neural-net.html>`_ describing the neural net structure with the detailed layer setting and their connections;
-  * a `TrainOneBatch <train-one-batch.html>`_  algorithm which is tailored for different model categories;
-  * an `Updater <updater.html>`_  defining the protocol for updating parameters at the server side;
-  * a `Cluster Topology <distributed-training.html>`_ specifying the distributed architecture of workers and servers.
-
-This process is like the job submission in Hadoop, where users configure their
-jobs in the main function to set the mapper, reducer, etc.
-In Hadoop, users can configure their jobs with their own (or built-in) mapper and reducer; in SINGA, users
-can configure their jobs with their own (or built-in) layer, updater, etc.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
deleted file mode 100644
index c60b6a5..0000000
--- a/doc/docs/software_stack.md
+++ /dev/null
@@ -1,99 +0,0 @@
-# Software Stack
-
-SINGA's software stack includes three major components, namely, core, IO and
-model. Figure 1 illustrates these components together with the hardware.
-The core component provides memory management and tensor operations;
-IO has classes for reading (and writing) data from (to) disk and network; The
-model component provides data structures and algorithms for machine learning models,
-e.g., layers for neural network models, optimizers/initializer/metric/loss for
-general machine learning models.
-
-
-<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
-<br/>
-<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
-
-## Core
-
-[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
-multi-dimensional array, which stores model variables and provides linear algebra
-operations for machine learning
-algorithms, including matrix multiplication and random functions. Each tensor
-instance (i.e. a tensor) is allocated on a Device instance.
-Each Device instance (i.e. a device) is created against one hardware device,
-e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
-tensor operations on its execution units, e.g. CPU threads or CUDA streams.
-
-Depending on the hardware and the programming language, SINGA have implemented
-the following specific device classes:
-
-* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
-* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
-* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
-  The execution units are the CommandQueues. Given that OpenCL is compatible with
-  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
-  extended for other devices.
-
-Different types of devices use different programming languages to write the kernel
-functions for tensor operations,
-
-* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
-* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
-* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
-
-In addition, different types of data, such as float32 and float16, could be supported by adding
-the corresponding tensor functions.
-
-Typically, users would create a device instance and pass it to create multiple
-tensor instances. When users call the Tensor functions, these function would invoke
-the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
-other words, the implementation of Tensor operations is transparent to users.
-
-Most machine learning algorithms could be expressed using (dense or sparse) tensors.
-Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
-including deep learning models and other traditional machine learning models.
-
-The Tensor and Device abstractions are extensible to support a wide range of hardware device
-using different programming languages. A new hardware device would be supported by
-adding a new Device subclass and the corresponding implementation of the Tensor
-operations (xxxMath).
-
-Optimizations in terms of speed and memory could be implemented by Device, which
-manages both operation execution and memory malloc/free. More optimization details
-would be described in the [Device page](device.html).
-
-
-## Model
-
-On top of the Tensor and Device abstractions, SINGA provides some higher level
-classes for machine learning modules.
-
-* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
-  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
-  They wraps the complex layer operations so that users can easily create neural nets
-  by connecting a set of layers.
-
-* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
-  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
-
-* [Loss](loss.html) and its subclasses defines the training objective loss functions.
-  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
-  objective loss are implemented. Example loss functions include squared error and cross entropy.
-
-* [Metric](metric.html) and its subclasses provide the function to measure the
-  performance of the model, e.g., the accuracy.
-
-* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
-  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
-
-
-## IO
-
-The IO module consists of classes for data loading, data preprocessing and message passing.
-
-* Reader and its subclasses load string records from disk files
-* Writer and its subclasses write string records to disk files
-* Encoder and its subclasses encode Tensor instances into string records
-* Decoder and its subclasses decodes string records into Tensor instances
-* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
-* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
deleted file mode 100644
index 87d26ea..0000000
--- a/doc/docs/tensor.rst
+++ /dev/null
@@ -1,54 +0,0 @@
-Tensor
-========
-
-Each Tensor instance is a multi-dimensional array allocated on a specific
-Device instance. Tensor instances store variables and provide
-linear algebra operations over different types of hardware devices without user
-awareness. Note that users need to make sure the tensor operands are
-allocated on the same device except copy functions.
-
-
-Tensor implementation
----------------------
-
-SINGA has three different sets of implmentations of Tensor functions, one for each
-type of Device.
-
-* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
-* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
-* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
-
-Python API
-----------
-
-There are two set of tensor functions,
-1. Tensor member functions, which would change the internal state of the Tensor instance.
-2. tensor module functions, which accepts Tensor instances as arguments and return
-Tensor instances.
-
-
-Create Tensor instances
-~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autoclass:: singa.tensor.Tensor
-
-
-Tensor instances can be constructed from Numpy array,
-
-.. automodule:: singa.tensor
-   :members: from_numpy
-
-
-Set Tensor values
-~~~~~~~~~~~~~~~~~
-
-
-
-
-
-
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
deleted file mode 100644
index 4b49d5f..0000000
--- a/doc/docs/zh/index.md
+++ /dev/null
@@ -1,9 +0,0 @@
-SINGA \u4e2d\u6587\u6587\u6863
-==============
-
-.. toctree::
-
-   overview
-   installation_source
-   programming-guide
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/downloads.md
----------------------------------------------------------------------
diff --git a/doc/downloads.md b/doc/downloads.md
deleted file mode 100644
index 31e7274..0000000
--- a/doc/downloads.md
+++ /dev/null
@@ -1,67 +0,0 @@
-## Download SINGA
----
-
-* Latest code: please clone the dev branch from [Github](https://github.com/apache/incubator-singa)
-
-* v0.3.0 (20 April 2016):
-    * [Apache SINGA 0.3.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz)
-      [\[MD5\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz.md5)
-      [\[KEYS\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/KEYS)
-    * [Release Notes 0.3.0](releases/RELEASE_NOTES_0.3.0.html)
-    * New features and major updates,
-        * [Training on GPU cluster](v0.3.0/gpu.html) enables training of deep learning models over a GPU cluster.
-        * [Python wrapper improvement](v0.3.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
-        * [New SGD updaters](v0.3.0/updater.html) are added, including Adam, AdaDelta and AdaMax.
-        * [Installation](v0.3.0/installation.html) has fewer dependent libraries for single node training.
-        * Heterogeneous training with CPU and GPU.
-        * Support cuDNN V4.
-        * Data prefetching.
-        * Fix some bugs.
-
-
-
-* v0.2.0 (14 January 2016):
-    * [Apache SINGA 0.2.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz)
-      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz.md5)
-      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/0.2.0/KEYS)
-    * [Release Notes 0.2.0](releases/RELEASE_NOTES_0.2.0.html)
-    * New features and major updates,
-        * [Training on GPU](v0.2.0/gpu.html) enables training of complex models on a single node with multiple GPU cards.
-        * [Hybrid neural net partitioning](v0.2.0/hybrid.html) supports data and model parallelism at the same time.
-        * [Python wrapper](v0.2.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
-        * [RNN model and BPTT algorithm](v0.2.0/general-rnn.html) are implemented to support applications based on RNN models, e.g., GRU.
-        * [Cloud software integration](v0.2.0/distributed-training.html) includes Mesos, Docker and HDFS.
-        * Visualization of neural net structure and layer information, which is helpful for debugging.
-        * Linear algebra functions and random functions against Blobs and raw data pointers.
-        * New layers, including SoftmaxLayer, ArgSortLayer, DummyLayer, RNN layers and cuDNN layers.
-        * Update Layer class to carry multiple data/grad Blobs.
-        * Extract features and test performance for new data by loading previously trained model parameters.
-        * Add Store class for IO operations.
-
-
-* v0.1.0 (8 October 2015):
-    * [Apache SINGA 0.1.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/apache-singa-incubating-0.1.0.tar.gz)
-      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/apache-singa-incubating-0.1.0.tar.gz.md5)
-      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/KEYS)
-    * [Amazon EC2 image](https://console.aws.amazon.com/ec2/v2/home?region=ap-southeast-1#LaunchInstanceWizard:ami=ami-b41001e6)
-    * [Release Notes 0.1.0](releases/RELEASE_NOTES_0.1.0.html)
-    * Major features include,
-        * Installation using GNU build utility
-        * Scripts for job management with zookeeper
-        * Programming model based on NeuralNet and Layer abstractions.
-        * System architecture based on Worker, Server and Stub.
-        * Training models from three different model categories, namely, feed-forward models, energy models and RNN models.
-        * Synchronous and asynchronous distributed training frameworks using CPU
-        * Checkpoint and restore
-        * Unit test using gtest
-
-**Disclaimer**
-
-Apache SINGA is an effort undergoing incubation at The Apache Software
-Foundation (ASF), sponsored by the name of Apache Incubator PMC. Incubation is
-required of all newly accepted projects until a further review indicates that
-the infrastructure, communications, and decision making process have stabilized
-in a manner consistent with other successful ASF projects. While incubation
-status is not necessarily a reflection of the completeness or stability of the
-code, it does indicate that the project has yet to be fully endorsed by the
-ASF.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/en/_templates/layout.html b/doc/en/_templates/layout.html
new file mode 100755
index 0000000..590e578
--- /dev/null
+++ b/doc/en/_templates/layout.html
@@ -0,0 +1,61 @@
+{#
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+#}
+{% extends "!layout.html" %}
+     
+{% block extrahead %}
+    <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
+{% endblock %}
+
+{% block footer %}
+
+<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
+<a href="http://incubator.apache.org/">
+<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
+</a>
+
+  <span class="rst-current-version" data-toggle="rst-current-version">
+    <span class="fa fa-book"> incubator-singa </span>
+    v: {{ version }}
+    <span class="fa fa-caret-down"></span>
+  </span>
+  <div class="rst-other-versions">
+    <dl>
+       <dd><a href="">English</a></dd>
+       <dd><a href="{{pathto('zh/'+ 'index.html' , 1) }}">\u4e2d\u6587</a></dd>	  
+	  <!--dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
+	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
+	  <dd><a href="/it/latest/">Italiano</a></dd>
+	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd-->
+    </dl>
+    </dl>
+    <dl>
+      <dt>Versions</dt>
+      <dd><a href="/{{ language }}/latest/">latest</a></dd>
+      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
+    </dl>
+  </div>
+</div>
+
+ <a href="https://github.com/apache/incubator-singa">
+    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
+        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
+        alt="Fork me on GitHub">
+</a>
+
+{{ super() }}
+{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/issue-tracking.md
----------------------------------------------------------------------
diff --git a/doc/en/community/issue-tracking.md b/doc/en/community/issue-tracking.md
new file mode 100644
index 0000000..26b23dd
--- /dev/null
+++ b/doc/en/community/issue-tracking.md
@@ -0,0 +1,9 @@
+## Issue Tracking
+
+___
+
+SINGA uses [JIRA](https://www.atlassian.com/software/jira) a J2EE-based, issue tracking and project management application.
+
+Issues, bugs, and feature requests should be submitted to the following issue tracking system for this project.
+
+* https://issues.apache.org/jira/browse/singa

[51/51] [abbrv] incubator-singa git commit: Merge branch 'v1-rc0'

Posted by wa...@apache.org.

Merge branch 'v1-rc0'


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f9c6d5c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f9c6d5c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f9c6d5c0

Branch: refs/heads/master
Commit: f9c6d5c05ff2f5839af57017ccf843a8eafebb32
Parents: 1ca8c63 ed9587c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 18 01:59:02 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 18 01:59:02 2016 +0800

----------------------------------------------------------------------
 .gitignore                                      |    45 +-
 .gitmodules                                     |     3 +
 .travis.yml                                     |    21 +
 CMakeLists.txt                                  |    77 +
 Doxyfile                                        |  2383 --
 LICENSE                                         |    42 +-
 Makefile.am                                     |   381 -
 Makefile.example                                |   116 -
 Makefile.gpu                                    |   154 -
 NOTICE                                          |     2 +-
 README.md                                       |   224 +-
 RELEASE_NOTES                                   |    88 +
 autogen.sh                                      |    23 -
 cmake/Cuda.cmake                                |    42 +
 cmake/Dependencies.cmake                        |    91 +
 cmake/Protobuf.cmake                            |    31 +
 cmake/Templates/singa_config.h.in               |    45 +
 cmake/Thirdparty/FindCBLAS.cmake                |    29 +
 cmake/Thirdparty/FindCUDNN.cmake                |    51 +
 cmake/Thirdparty/FindGlog.cmake                 |    29 +
 cmake/Thirdparty/FindLMDB.cmake                 |    30 +
 cmake/Utils.cmake                               |    70 +
 conf/hostfile                                   |     1 -
 conf/profile                                    |     3 -
 conf/singa.conf                                 |     7 -
 configure.ac                                    |   311 -
 doc/Doxyfile                                    |  2383 ++
 doc/Readme.md                                   |     3 -
 doc/_static/apache.jpg                          |   Bin 0 -> 2310 bytes
 doc/_static/images/mlp-net.png                  |   Bin 0 -> 6905 bytes
 doc/_static/images/model-category.png           |   Bin 0 -> 18395 bytes
 doc/_static/images/overview.png                 |   Bin 0 -> 76106 bytes
 doc/_static/images/partition_fc.png             |   Bin 0 -> 23874 bytes
 doc/_static/images/rbm-rnn.png                  |   Bin 0 -> 15499 bytes
 doc/_static/images/sgd.png                      |   Bin 0 -> 15553 bytes
 doc/_static/images/singa.png                    |   Bin 0 -> 203695 bytes
 doc/_static/images/singav1-sw.png               |   Bin 0 -> 24326 bytes
 doc/_static/singa.png                           |   Bin 0 -> 25000 bytes
 doc/_static/style.css                           |     3 +
 doc/_templates/layout.html                      |    57 +
 doc/build.sh                                    |    38 +
 doc/conf.py                                     |   354 +
 doc/en/_templates/layout.html                   |    56 +
 doc/en/community/issue-tracking.md              |     9 +
 doc/en/community/mail-lists.rst                 |    28 +
 doc/en/community/source-repository.md           |    22 +
 doc/en/community/team-list.rst                  |    82 +
 doc/en/develop/contribute-code.md               |    60 +
 doc/en/develop/contribute-docs.md               |    28 +
 doc/en/develop/how-contribute.md                |    11 +
 doc/en/develop/schedule.rst                     |    57 +
 doc/en/docs.rst                                 |    23 +
 doc/en/docs/cnn.md                              |   141 +
 doc/en/docs/device.rst                          |    54 +
 doc/en/docs/index.rst                           |    33 +
 doc/en/docs/initializer.rst                     |    30 +
 doc/en/docs/installation.md                     |   233 +
 doc/en/docs/layer.rst                           |    32 +
 doc/en/docs/loss.rst                            |    25 +
 doc/en/docs/metric.rst                          |    26 +
 doc/en/docs/neural-net.md                       |   327 +
 doc/en/docs/optimizer.rst                       |    29 +
 doc/en/docs/software_stack.md                   |    99 +
 doc/en/docs/tensor.rst                          |    48 +
 doc/en/docs/utils.rst                           |    24 +
 doc/en/downloads.md                             |    66 +
 doc/en/index.rst                                |   124 +
 doc/en/releases/RELEASE_NOTES_0.1.0.md          |    99 +
 doc/en/releases/RELEASE_NOTES_0.2.0.md          |    84 +
 doc/en/releases/RELEASE_NOTES_0.3.0.md          |    37 +
 doc/zh/index.rst                                |    27 +
 examples/CMakeLists.txt                         |    20 +
 examples/alexnet/Makefile.example               |    29 -
 examples/alexnet/cudnn.conf                     |   448 -
 examples/alexnet/im2rec.cc                      |   157 -
 examples/alexnet/job.conf                       |   403 -
 examples/alexnet/rec2im_test.cc                 |   116 -
 examples/char-rnn/README.md                     |    33 +
 examples/char-rnn/data.py                       |    32 -
 examples/char-rnn/job.conf                      |   253 -
 examples/char-rnn/sample.conf                   |   212 -
 examples/char-rnn/sample.py                     |   102 +
 examples/char-rnn/train.py                      |   229 +
 examples/cifar10/CMakeLists.txt                 |    36 +
 examples/cifar10/Makefile.example               |    41 -
 examples/cifar10/README.md                      |    77 +
 examples/cifar10/alexnet-parallel.cc            |   265 +
 examples/cifar10/alexnet.cc                     |   203 +
 examples/cifar10/alexnet.py                     |    61 +
 examples/cifar10/cifar10.h                      |    98 +
 examples/cifar10/create_data.cc                 |   138 -
 examples/cifar10/cudnn.conf                     |   297 -
 examples/cifar10/cudnn_bm.conf                  |   376 -
 examples/cifar10/cudnn_hybrid.conf              |   306 -
 examples/cifar10/download_data.py               |    70 +
 examples/cifar10/hybrid.conf                    |   292 -
 examples/cifar10/job.conf                       |   279 -
 examples/cifar10/predict.py                     |    90 +
 examples/cifar10/resnet.py                      |    95 +
 examples/cifar10/run-parallel.sh                |    21 +
 examples/cifar10/run.sh                         |    20 +
 examples/cifar10/train.py                       |   186 +
 examples/cifar10/vgg-parallel.cc                |   327 +
 examples/cifar10/vgg.py                         |    94 +
 examples/imagenet/CMakeLists.txt                |    34 +
 examples/imagenet/README.md                     |    58 +
 examples/imagenet/alexnet.cc                    |   402 +
 examples/imagenet/create_data.sh                |    21 +
 examples/imagenet/ilsvrc12.cc                   |    70 +
 examples/imagenet/ilsvrc12.h                    |   380 +
 examples/imagenet/run.sh                        |    21 +
 examples/index.rst                              |    28 +
 examples/mnist/Makefile.example                 |    49 -
 examples/mnist/README.md                        |    18 +
 examples/mnist/conv.conf                        |   187 -
 examples/mnist/create_data.cc                   |   125 -
 examples/mnist/job.conf                         |   241 -
 examples/mnist/rbm_job.conf                     |    95 -
 examples/mnist/train.py                         |   133 +
 examples/rbm/autoencoder.conf                   |   229 -
 examples/rbm/rbm1.conf                          |   101 -
 examples/rbm/rbm2.conf                          |   122 -
 examples/rbm/rbm3.conf                          |   147 -
 examples/rbm/rbm4.conf                          |   167 -
 examples/rnnlm/Makefile.example                 |    52 -
 examples/rnnlm/README.md                        |    52 -
 examples/rnnlm/create_data.cc                   |   444 -
 examples/rnnlm/job.conf                         |   120 -
 examples/rnnlm/main.cc                          |    49 -
 examples/rnnlm/rnnlm.cc                         |   335 -
 examples/rnnlm/rnnlm.h                          |   158 -
 examples/rnnlm/rnnlm.proto                      |    53 -
 include/gtest/gtest-all.cc                      |  9592 --------
 include/gtest/gtest.h                           | 20061 -----------------
 include/gtest/gtest_main.cc                     |    38 -
 include/mshadow/cuda/cuda_reduce.cuh            |   117 -
 include/mshadow/cuda/tensor_gpu-inl.cuh         |   231 -
 include/mshadow/cxxnet_op.h                     |   127 -
 include/mshadow/tensor.h                        |   472 -
 include/mshadow/tensor_base.h                   |   298 -
 include/mshadow/tensor_container.h              |   152 -
 include/mshadow/tensor_cpu-inl.hpp              |   168 -
 include/mshadow/tensor_expr.h                   |   367 -
 include/mshadow/tensor_expr_engine-inl.hpp      |   416 -
 include/mshadow/tensor_expr_ext.h               |   978 -
 include/mshadow/tensor_gpu-inl.hpp              |   148 -
 include/mshadow/tensor_io.h                     |   137 -
 include/mshadow/tensor_random.h                 |   369 -
 include/mshadow/tensor_sse-inl.hpp              |   431 -
 include/singa/comm/msg.h                        |   243 -
 include/singa/comm/socket.h                     |   123 -
 include/singa/core/common.h                     |   121 +
 include/singa/core/device.h                     |   381 +
 include/singa/core/memory.h                     |    91 +
 include/singa/core/scheduler.h                  |    27 +
 include/singa/core/tensor.h                     |   466 +
 include/singa/driver.h                          |   264 -
 include/singa/io/decoder.h                      |    74 +
 include/singa/io/encoder.h                      |    73 +
 include/singa/io/hdfs_store.h                   |    58 -
 include/singa/io/hdfsfile.h                     |   131 -
 include/singa/io/imagefolder_store.h            |    21 -
 include/singa/io/integer.h                      |    73 +
 include/singa/io/kvfile.h                       |   182 -
 include/singa/io/kvfile_store.h                 |    56 -
 include/singa/io/network.h                      |   171 +
 include/singa/io/reader.h                       |   188 +
 include/singa/io/snapshot.h                     |    81 +
 include/singa/io/store.h                        |   111 -
 include/singa/io/textfile_store.h               |    57 -
 include/singa/io/transformer.h                  |    89 +
 include/singa/io/writer.h                       |   171 +
 include/singa/model/feed_forward_net.h          |   166 +
 include/singa/model/initializer.h               |   128 +
 include/singa/model/layer.h                     |   253 +
 include/singa/model/loss.h                      |   106 +
 include/singa/model/metric.h                    |    81 +
 include/singa/model/optimizer.h                 |   302 +
 include/singa/model/updater.h                   |    97 +
 include/singa/neuralnet/connection_layer.h      |   187 -
 include/singa/neuralnet/input_layer.h           |   336 -
 include/singa/neuralnet/layer.h                 |   376 -
 include/singa/neuralnet/loss_layer.h            |    83 -
 include/singa/neuralnet/neuralnet.h             |   173 -
 include/singa/neuralnet/neuron_layer.h          |   560 -
 include/singa/neuralnet/output_layer.h          |    99 -
 include/singa/server.h                          |   135 -
 include/singa/singa.h                           |    37 -
 include/singa/stub.h                            |   108 -
 include/singa/utils/blob.h                      |   414 -
 include/singa/utils/channel.h                   |    85 +
 include/singa/utils/cluster.h                   |   161 -
 include/singa/utils/cluster_rt.h                |   105 -
 include/singa/utils/common.h                    |   165 -
 include/singa/utils/context.h                   |   276 -
 include/singa/utils/cuda_utils.h                |   118 +-
 include/singa/utils/factory.h                   |    69 +-
 include/singa/utils/graph.h                     |   196 -
 include/singa/utils/image_transform.h           |    35 -
 include/singa/utils/integer.h                   |    73 +
 include/singa/utils/job_manager.h               |    79 -
 include/singa/utils/logging.h                   |   293 +
 include/singa/utils/math_addr.h                 |   279 -
 include/singa/utils/math_blob.h                 |   762 -
 include/singa/utils/math_kernel.h               |    88 -
 include/singa/utils/opencl_utils.h              |   144 +
 include/singa/utils/param.h                     |   407 -
 include/singa/utils/singa_op.h                  |   299 -
 include/singa/utils/singleton.h                 |    22 +-
 include/singa/utils/string.h                    |   101 +
 include/singa/utils/timer.h                     |    58 +
 include/singa/utils/tokenizer.h                 |    65 -
 include/singa/utils/updater.h                   |   173 -
 include/singa/utils/zk_service.h                |   116 -
 include/singa/worker.h                          |   340 -
 jenkins.sh                                      |    51 +
 lib/cnmem                                       |     1 +
 rat-excludes                                    |     1 -
 src/CMakeLists.txt                              |   135 +
 src/comm/msg.cc                                 |   265 -
 src/comm/socket.cc                              |   146 -
 src/core/device/cpp_cpu.cc                      |    64 +
 src/core/device/cuda_gpu.cc                     |   126 +
 src/core/device/device.cc                       |    74 +
 src/core/device/opencl_device.cc                |   248 +
 src/core/device/platform.cc                     |   141 +
 src/core/memory/memory.cc                       |   111 +
 src/core/scheduler/scheduler.cc                 |    19 +
 src/core/tensor/distribution.cl                 |  1020 +
 src/core/tensor/math_kernel.cu                  |   649 +
 src/core/tensor/math_kernel.h                   |   120 +
 src/core/tensor/sparse_tensor.cc                |    19 +
 src/core/tensor/tensor.cc                       |  1023 +
 src/core/tensor/tensor_math.h                   |   416 +
 src/core/tensor/tensor_math_cpp.h               |   705 +
 src/core/tensor/tensor_math_cuda.h              |   468 +
 src/core/tensor/tensor_math_opencl.cl           |   598 +
 src/core/tensor/tensor_math_opencl.h            |  1113 +
 src/driver.cc                                   |   402 -
 src/io/binfile_reader.cc                        |   136 +
 src/io/binfile_writer.cc                        |   112 +
 src/io/csv_decoder.cc                           |    55 +
 src/io/csv_encoder.cc                           |    43 +
 src/io/hdfsfile.cc                              |   135 -
 src/io/hdfsfile_store.cc                        |    75 -
 src/io/image_transformer.cc                     |   356 +
 src/io/jpg_decoder.cc                           |    75 +
 src/io/jpg_encoder.cc                           |    83 +
 src/io/kvfile.cc                                |   219 -
 src/io/kvfile_store.cc                          |    76 -
 src/io/lmdb_reader.cc                           |   118 +
 src/io/lmdb_writer.cc                           |   133 +
 src/io/network/endpoint.cc                      |   831 +
 src/io/network/message.cc                       |    95 +
 src/io/snapshot.cc                              |   106 +
 src/io/store.cc                                 |    70 -
 src/io/textfile_reader.cc                       |    69 +
 src/io/textfile_store.cc                        |    89 -
 src/io/textfile_writer.cc                       |    61 +
 src/main.cc                                     |    79 -
 src/model/feed_forward_net.cc                   |   302 +
 src/model/layer/activation.cc                   |    87 +
 src/model/layer/activation.h                    |    57 +
 src/model/layer/batchnorm.cc                    |   200 +
 src/model/layer/batchnorm.h                     |    90 +
 src/model/layer/convolution.cc                  |   232 +
 src/model/layer/convolution.h                   |    98 +
 src/model/layer/cudnn_activation.cc             |   121 +
 src/model/layer/cudnn_activation.h              |    59 +
 src/model/layer/cudnn_batchnorm.cc              |   230 +
 src/model/layer/cudnn_batchnorm.h               |    58 +
 src/model/layer/cudnn_convolution.cc            |   251 +
 src/model/layer/cudnn_convolution.h             |    73 +
 src/model/layer/cudnn_dropout.cc                |   116 +
 src/model/layer/cudnn_dropout.h                 |    62 +
 src/model/layer/cudnn_lrn.cc                    |    95 +
 src/model/layer/cudnn_lrn.h                     |    54 +
 src/model/layer/cudnn_pooling.cc                |   132 +
 src/model/layer/cudnn_pooling.h                 |    57 +
 src/model/layer/cudnn_rnn.cc                    |   427 +
 src/model/layer/cudnn_rnn.h                     |    88 +
 src/model/layer/cudnn_softmax.cc                |   102 +
 src/model/layer/cudnn_softmax.h                 |    59 +
 src/model/layer/cudnn_utils.h                   |    86 +
 src/model/layer/dense.cc                        |    94 +
 src/model/layer/dense.h                         |    76 +
 src/model/layer/dropout.cc                      |    65 +
 src/model/layer/dropout.h                       |    67 +
 src/model/layer/flatten.cc                      |    57 +
 src/model/layer/flatten.h                       |    56 +
 src/model/layer/lrn.cc                          |   151 +
 src/model/layer/lrn.h                           |    73 +
 src/model/layer/merge.cc                        |    63 +
 src/model/layer/merge.h                         |    53 +
 src/model/layer/pooling.cc                      |   295 +
 src/model/layer/pooling.h                       |    90 +
 src/model/layer/prelu.cc                        |   149 +
 src/model/layer/prelu.h                         |    66 +
 src/model/layer/rnn.cc                          |   103 +
 src/model/layer/rnn.h                           |    96 +
 src/model/layer/softmax.cc                      |    74 +
 src/model/layer/softmax.h                       |    48 +
 src/model/layer/split.cc                        |    53 +
 src/model/layer/split.h                         |    55 +
 src/model/loss/mse.cc                           |    42 +
 src/model/loss/softmax_cross_entropy.cc         |    56 +
 src/model/metric/accuracy.cc                    |    64 +
 src/model/optimizer/adagrad.cc                  |    43 +
 src/model/optimizer/local_all_reduce.cc         |    25 +
 src/model/optimizer/nesterov.cc                 |    51 +
 src/model/optimizer/optimizer.cc                |   120 +
 src/model/optimizer/rmsprop.cc                  |    46 +
 src/model/optimizer/sgd.cc                      |    54 +
 src/model/rnn.cc                                |    27 +
 src/model/updater/local_updater.cc              |    77 +
 src/model/updater/updater.cc                    |    32 +
 src/neuralnet/connection_layer/bridge.cc        |   108 -
 src/neuralnet/connection_layer/concate.cc       |   118 -
 src/neuralnet/connection_layer/rnn_dummy.cc     |    67 -
 src/neuralnet/connection_layer/slice.cc         |   166 -
 src/neuralnet/connection_layer/split.cc         |    91 -
 src/neuralnet/input_layer/char_rnn.cc           |    93 -
 src/neuralnet/input_layer/csv.cc                |    67 -
 src/neuralnet/input_layer/deprecated.cc         |   373 -
 src/neuralnet/input_layer/image_preprocess.cc   |    78 -
 src/neuralnet/input_layer/onehot.cc             |    40 -
 src/neuralnet/input_layer/record.cc             |    73 -
 src/neuralnet/input_layer/rnn_label.cc          |    35 -
 src/neuralnet/input_layer/store.cc              |   162 -
 src/neuralnet/layer.cc                          |    82 -
 src/neuralnet/loss_layer/cudnn_softmaxloss.cc   |    83 -
 src/neuralnet/loss_layer/euclidean.cc           |    80 -
 src/neuralnet/loss_layer/softmax.cc             |   112 -
 src/neuralnet/neuralnet.cc                      |   644 -
 src/neuralnet/neuron_layer/activation.cc        |    87 -
 src/neuralnet/neuron_layer/bm.cc                |    64 -
 src/neuralnet/neuron_layer/convolution.cc       |   192 -
 src/neuralnet/neuron_layer/cudnn_activation.cc  |   108 -
 src/neuralnet/neuron_layer/cudnn_bm.cc          |   149 -
 src/neuralnet/neuron_layer/cudnn_convolution.cc |   221 -
 src/neuralnet/neuron_layer/cudnn_lrn.cc         |    87 -
 src/neuralnet/neuron_layer/cudnn_pooling.cc     |    95 -
 src/neuralnet/neuron_layer/cudnn_softmax.cc     |    76 -
 src/neuralnet/neuron_layer/dropout.cc           |    62 -
 src/neuralnet/neuron_layer/dummy.cc             |   102 -
 src/neuralnet/neuron_layer/embedding.cc         |    98 -
 src/neuralnet/neuron_layer/gru.cc               |   258 -
 src/neuralnet/neuron_layer/inner_product.cc     |    89 -
 src/neuralnet/neuron_layer/lrn.cc               |    75 -
 src/neuralnet/neuron_layer/pooling.cc           |   146 -
 src/neuralnet/neuron_layer/rbm.cc               |   200 -
 src/neuralnet/neuron_layer/relu.cc              |    51 -
 src/neuralnet/neuron_layer/sigmoid.cc           |    51 -
 src/neuralnet/neuron_layer/softmax.cc           |    70 -
 src/neuralnet/neuron_layer/stanh.cc             |    48 -
 src/neuralnet/output_layer/accuracy.cc          |    61 -
 src/neuralnet/output_layer/argsort.cc           |    56 -
 src/neuralnet/output_layer/char_rnn.cc          |    51 -
 src/neuralnet/output_layer/csv.cc               |    59 -
 src/neuralnet/output_layer/record.cc            |    56 -
 src/proto/common.proto                          |   114 -
 src/proto/core.proto                            |    76 +
 src/proto/io.proto                              |    58 +
 src/proto/job.proto                             |   816 -
 src/proto/model.proto                           |   956 +
 src/proto/singa.proto                           |    29 -
 src/python/setup.py.in                          |    98 +
 src/python/singa/__init__.py                    |    19 +
 src/python/singa/command.py                     |   240 +
 src/python/singa/device.py                      |   123 +
 src/python/singa/initializer.py                 |   122 +
 src/python/singa/layer.py                       |   933 +
 src/python/singa/loss.py                        |   141 +
 src/python/singa/metric.py                      |    85 +
 src/python/singa/model.py                       |    21 +
 src/python/singa/net.py                         |   213 +
 src/python/singa/optimizer.py                   |   377 +
 src/python/singa/tensor.py                      |  1011 +
 src/python/singa/utils.py                       |    47 +
 src/python/swig/config.i.in                     |     4 +
 src/python/swig/core_device.i                   |    69 +
 src/python/swig/core_tensor.i                   |   371 +
 src/python/swig/model_layer.i                   |   102 +
 src/python/swig/model_loss.i                    |    62 +
 src/python/swig/model_metric.i                  |    43 +
 src/python/swig/model_optimizer.i               |    70 +
 src/python/swig/numpy.i                         |  3119 +++
 src/python/swig/singa.i                         |    31 +
 src/server.cc                                   |   259 -
 src/stub.cc                                     |   282 -
 src/test/test_cluster.cc                        |   143 -
 src/test/test_common.cc                         |   133 -
 src/test/test_connection_layers.cc              |   459 -
 src/test/test_context.cc                        |    76 -
 src/test/test_csv_input_layer.cc                |    92 -
 src/test/test_gru_layer.cc                      |   287 -
 src/test/test_kvfile.cc                         |    85 -
 src/test/test_math.cc                           |  1033 -
 src/test/test_msg.cc                            |   102 -
 src/test/test_neuralnet.cc                      |   116 -
 src/test/test_paramslicer.cc                    |    70 -
 src/test/test_record_input_layer.cc             |   122 -
 src/test/test_store.cc                          |    92 -
 src/test/test_unrolling.cc                      |   373 -
 src/utils/blob.cc                               |   259 -
 src/utils/channel.cc                            |   104 +
 src/utils/cluster.cc                            |   131 -
 src/utils/cluster_rt.cc                         |   110 -
 src/utils/common.cc                             |   574 -
 src/utils/graph.cc                              |   273 -
 src/utils/image_transform.cc                    |    57 -
 src/utils/job_manager.cc                        |   271 -
 src/utils/logging.cc                            |   170 +
 src/utils/math_kernel.cu                        |   450 -
 src/utils/opencl_utils.cc                       |    63 +
 src/utils/param.cc                              |   447 -
 src/utils/tool.cc                               |   169 -
 src/utils/updater.cc                            |   284 -
 src/utils/zk_service.cc                         |   326 -
 src/worker.cc                                   |   545 -
 test/CMakeLists.txt                             |    47 +
 test/gtest/CMakeLists.txt                       |    19 +
 test/gtest/gtest-all.cc                         |  9592 ++++++++
 test/gtest/gtest.h                              | 20061 +++++++++++++++++
 test/gtest/gtest_main.cc                        |    38 +
 test/python/test_layer.py                       |   213 +
 test/python/test_optimizer.py                   |   104 +
 test/python/test_tensor.py                      |   137 +
 test/singa/test_accuracy.cc                     |    35 +
 test/singa/test_activation.cc                   |   136 +
 test/singa/test_adagrad.cc                      |    96 +
 test/singa/test_batchnorm.cc                    |   132 +
 test/singa/test_binfile_rw.cc                   |   133 +
 test/singa/test_channel.cc                      |    39 +
 test/singa/test_convolution.cc                  |   208 +
 test/singa/test_cpp_cpu.cc                      |    72 +
 test/singa/test_cross_entropy.cc                |   116 +
 test/singa/test_csv.cc                          |    60 +
 test/singa/test_cudnn_activation.cc             |   134 +
 test/singa/test_cudnn_batchnorm.cc              |   240 +
 test/singa/test_cudnn_convolution.cc            |   371 +
 test/singa/test_cudnn_dropout.cc                |   126 +
 test/singa/test_cudnn_lrn.cc                    |   203 +
 test/singa/test_cudnn_pooling.cc                |   131 +
 test/singa/test_cudnn_rnn.cc                    |   181 +
 test/singa/test_cudnn_softmax.cc                |   169 +
 test/singa/test_dense.cc                        |   243 +
 test/singa/test_dropout.cc                      |   101 +
 test/singa/test_ep.cc                           |   113 +
 test/singa/test_flatten.cc                      |   143 +
 test/singa/test_image_transformer.cc            |   261 +
 test/singa/test_initializer.cc                  |   148 +
 test/singa/test_jpg.cc                          |   100 +
 test/singa/test_layer.cc                        |    46 +
 test/singa/test_lmdb_rw.cc                      |   140 +
 test/singa/test_logging.cc                      |    64 +
 test/singa/test_lrn.cc                          |   116 +
 test/singa/test_memory.cc                       |    99 +
 test/singa/test_mse.cc                          |   109 +
 test/singa/test_nesterov.cc                     |   101 +
 test/singa/test_opencl.cc                       |   629 +
 test/singa/test_platform.cc                     |    97 +
 test/singa/test_pooling.cc                      |   141 +
 test/singa/test_prelu.cc                        |   249 +
 test/singa/test_rmsprop.cc                      |   105 +
 test/singa/test_sgd.cc                          |   150 +
 test/singa/test_snapshot.cc                     |   123 +
 test/singa/test_softmax.cc                      |   103 +
 test/singa/test_tensor.cc                       |   131 +
 test/singa/test_tensor_math.cc                  |   901 +
 test/singa/test_textfile_rw.cc                  |   133 +
 test/singa/test_timer.cc                        |    30 +
 tool/python/README.md                           |   375 -
 tool/python/examples/__init__.py                |    22 -
 tool/python/examples/cifar10_cnn.py             |    55 -
 tool/python/examples/cifar10_cnn_cudnn.py       |    57 -
 tool/python/examples/cifar10_cnn_parameter.py   |    57 -
 tool/python/examples/datasets/__init__.py       |    22 -
 tool/python/examples/datasets/cifar10.py        |    57 -
 tool/python/examples/datasets/mnist.py          |    55 -
 tool/python/examples/mnist_ae.py                |    48 -
 tool/python/examples/mnist_mlp.py               |    55 -
 tool/python/examples/mnist_mlp_parameter.py     |    50 -
 tool/python/examples/mnist_mlp_test.py          |    52 -
 tool/python/examples/mnist_rbm1.py              |    46 -
 tool/python/examples/mnist_rbm2.py              |    47 -
 tool/python/examples/mnist_rbm3.py              |    47 -
 tool/python/examples/mnist_rbm4.py              |    47 -
 tool/python/examples/train_cifar10.py           |   142 -
 tool/python/examples/train_mnist.py             |   117 -
 tool/python/singa.py                            |    46 -
 tool/python/singa/__init__.py                   |    22 -
 tool/python/singa/driver.i                      |   117 -
 tool/python/singa/generatepy.sh                 |    26 -
 tool/python/singa/initializations.py            |    67 -
 tool/python/singa/layer.py                      |   693 -
 tool/python/singa/model.py                      |   716 -
 tool/python/singa/parameter.py                  |   140 -
 tool/python/singa/utils/__init__.py             |    22 -
 tool/python/singa/utils/message.py              |    80 -
 tool/python/singa/utils/utility.py              |    86 -
 501 files changed, 73635 insertions(+), 69831 deletions(-)
----------------------------------------------------------------------

[39/51] [abbrv] incubator-singa git commit: Merge updates for docs and bug-fix for pool.cc and tensor.to_numpy()

Posted by wa...@apache.org.

Merge updates for docs and bug-fix for pool.cc and tensor.to_numpy()


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d2300ae2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d2300ae2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d2300ae2

Branch: refs/heads/master
Commit: d2300ae258c77ce488734efe98dbaf69f3210ff9
Parents: a144a61 22889bc
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Aug 16 15:41:33 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Tue Aug 16 15:41:33 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/train.py | 36 +++++++++++++++++++-----------------
 src/model/layer/pooling.cc | 17 +++++++++--------
 src/python/singa/tensor.py | 16 ++++++++++------
 3 files changed, 38 insertions(+), 31 deletions(-)
----------------------------------------------------------------------

[24/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

Update installation.md.
Update flags in MacOS.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/30731ee4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/30731ee4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/30731ee4

Branch: refs/heads/master
Commit: 30731ee412f85c80f2d8ce81671eaa7a84d53524
Parents: d3a57cf
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Mon Aug 15 16:42:25 2016 +0800
Committer: xiezl <xi...@comp.nus.edu.sg>
Committed: Mon Aug 15 16:42:25 2016 +0800

----------------------------------------------------------------------
 doc/docs/installation.md | 20 +++++++++++---------
 test/CMakeLists.txt      |  5 ++++-
 2 files changed, 15 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/30731ee4/doc/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/docs/installation.md b/doc/docs/installation.md
index 8ab617f..4cf4ea7 100755
--- a/doc/docs/installation.md
+++ b/doc/docs/installation.md
@@ -36,24 +36,29 @@ which could be downloaded as
     $ git submodule update
 
 
-### Linux OS
+### Linux & MacOS 
+
+GCC (>=4.8.1) is required to compile SINGA on Linux.
+You can use gcc compiler to do the installation on MacOS following the
+steps in Linux installation. 
+In addition, you can also install singa via clang compiler following the
+commands in this section.
 
-GCC (>=4.8.1) is required to compile SINGA on Linux OS.
 In SINGA_ROOT, execute the following commands for compiling SINGA,
 
     $ mkdir build && cd build
-    # generate Makefile for compilation
     $ cmake ..
-    # compile SINGA
     $ make
 
-Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
+Note that if you are using CUDNN and it is not installed under system default
+folder, you need to let cmake know the paths to CUDNN,
 
     $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
     $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
 
 You can use `ccmake ..` to configure the compilation options including using
-LMDB, GLOG, etc.
+LMDB, GLOG, etc. In addition, you can set the proper search paths for the
+dependent libraries.
 
 After compiling SINGA, you can run the unit tests by
 
@@ -63,7 +68,4 @@ You can see all the testing cases with testing results. If SINGA passes all
 tests, then you have successfully installed SINGA. Please proceed to try the examples!
 
 
-### MacOS
-
-
 ### Windows

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/30731ee4/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f196928..6e7dd84 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -23,5 +23,8 @@ ADD_DEPENDENCIES(test_singa singa_core singa_utils)
 #MESSAGE(STATUS "link libs" ${singa_linker_libs})
 TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
     singa_io proto protobuf ${SINGA_LINKER_LIBS})
-SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread ")
+IF(UNIX AND (NOT APPLE))
+    LIST(APPEND LINK_FLAGS "-pthread")
+ENDIF()
+SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS}")

[48/51] [abbrv] incubator-singa git commit: Preparing for V1.0 RC0.

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/mnist_rbm4.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/mnist_rbm4.py b/tool/python/examples/mnist_rbm4.py
deleted file mode 100755
index 8343b4f..0000000
--- a/tool/python/examples/mnist_rbm4.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env python
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from examples.datasets import mnist
-
-rbmid = 4
-pvalues = {'batchsize' : 100, 'shape' : 784, 'std_value' : 255}
-X_train, X_test, workspace = mnist.load_data(
-            workspace = 'examples/rbm/rbm'+str(rbmid),
-            nb_rbm = rbmid,
-            checkpoint_steps = 6000,
-            **pvalues)
-
-m = Energy('rbm'+str(rbmid), sys.argv)
-
-out_dim = [1000, 500, 250, 30]
-m.add(RBM(out_dim, sampling='gaussian', w_std=0.1, b_wd=0))
-
-sgd = SGD(lr=0.001, decay=0.0002, momentum=0.8)
-topo = Cluster(workspace)
-m.compile(optimizer=sgd, cluster=topo)
-m.fit(X_train, alg='cd', nb_epoch=6000)
-#result = m.evaluate(X_test, test_steps=100, test_freq=500)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/train_cifar10.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/train_cifar10.py b/tool/python/examples/train_cifar10.py
deleted file mode 100755
index e8ac973..0000000
--- a/tool/python/examples/train_cifar10.py
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-'''
-Example script of CNN model for CIFAR10 dataset
-'''
-import os, sys
-import numpy as np
-
-current_path_ = os.path.dirname(__file__)
-singa_root_ = os.path.abspath(os.path.join(current_path_,'../../..'))
-sys.path.append(os.path.join(singa_root_,'tool','python'))
-
-from singa.driver import Driver
-from singa.layer import *
-from singa.model import *
-
-
-'''
-CIFAR10 dataset can be downloaded at [https://www.cs.toronto.edu/~kriz/cifar.html]
-- please specify dataset_dir
-'''
-dataset_dir_ = singa_root_ + "/tool/python/examples/datasets/cifar-10-batches-py"
-mean_image = None
-
-def unpickle(file):
-    ''' This method loads dataset provided at CIFAR10 website
-        See [https://www.cs.toronto.edu/~kriz/cifar.html] for more details
-    '''
-    import cPickle
-    fo = open(file, 'rb')
-    dict = cPickle.load(fo)
-    fo.close()
-    return dict
-
-def compute_mean_image():
-    ''' This is a sample script to cmopute the average image
-        of all samples in 5 dataset of cifar10
-    '''
-    mean = None
-    nb_samples_total = 0
-    for did in range(1,6):
-        fname_train_data = dataset_dir_ + "/data_batch_{}".format(did)
-        cifar10 = unpickle(fname_train_data)
-        image = cifar10['data'].astype(dtype=np.uint8)
-        if did > 1:
-            image = np.vstack((image, image))
-    return np.average(image, axis=0)
-
-def load_dataset(did=1):
-    ''' CIFAR10 dataset includes
-        5 binary dataset, each contains 10000 images
-        1 row (1 image) includes 1 label & 3072 pixels
-        3072 pixels are  3 channels of a 32x32 image
-    '''
-    assert mean_image is not None, 'mean_image is required'
-    print '[Load CIFAR10 dataset {}]'.format(did)
-    fname_train_data = dataset_dir_ + "/data_batch_{}".format(did)
-    cifar10 = unpickle(fname_train_data)
-    image = cifar10['data'].astype(dtype=np.uint8)
-    image = image - mean_image
-    print '  image x:', image.shape
-    label = np.asarray(cifar10['labels'], dtype=np.uint8)
-    label = label.reshape(label.size, 1)
-    print '  label y:', label.shape
-    return image, label
-
-#-------------------------------------------------------------------
-mean_image = compute_mean_image()
-# mean_image = np.fromfile('tool/python/examples/datasets/cifar10_mean_image')
-
-print '[Layer registration/declaration]'
-d = Driver()
-d.Init(sys.argv)
-
-input = ImageInput(32, 32, 3) # image width, height, channel
-label = LabelInput()
-
-nn = []
-nn.append(input)
-nn.append(Convolution2D(32, 5, 1, 2, w_std=0.0001, b_lr=2))
-nn.append(MaxPooling2D(pool_size=(3,3), stride=2))
-nn.append(Activation('relu'))
-nn.append(LRN2D(3, alpha=0.00005, beta=0.75))
-nn.append(Convolution2D(32, 5, 1, 2, b_lr=2))
-nn.append(Activation('relu'))
-nn.append(AvgPooling2D(pool_size=(3,3), stride=2))
-nn.append(LRN2D(3, alpha=0.00005, beta=0.75))
-nn.append(Convolution2D(64, 5, 1, 2))
-nn.append(Activation('relu'))
-nn.append(AvgPooling2D(pool_size=(3,3), stride=2))
-nn.append(Dense(10, w_wd=250, b_lr=2, b_wd=0))
-loss = Loss('softmaxloss')
-
-# updater
-sgd = SGD(decay=0.004, momentum=0.9, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
-
-#-------------------------------------------------------------------
-batchsize = 100
-disp_freq = 50
-train_step = 1000
-
-print '[Start training]'
-for dataset_id in range(train_step / batchsize):
-
-    x, y = load_dataset(dataset_id%5+1)
-
-    for i in range(x.shape[0] / batchsize):
-        xb, yb = x[i*batchsize:(i+1)*batchsize,:], y[i*batchsize:(i+1)*batchsize,:]
-        nn[0].Feed(xb)
-        label.Feed(yb)
-        for h in range(1, len(nn)):
-            nn[h].ComputeFeature(nn[h-1])
-        loss.ComputeFeature(nn[-1], label)
-        if (i+1)%disp_freq == 0:
-            print '  Step {:>3}: '.format(i+1 + dataset_id*(x.shape[0]/batchsize)),
-            loss.display()
-
-        loss.ComputeGradient()
-        for h in range(len(nn)-1, 0, -1):
-            nn[h].ComputeGradient()
-            sgd.Update(i+1, nn[h])

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/examples/train_mnist.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/train_mnist.py b/tool/python/examples/train_mnist.py
deleted file mode 100755
index b8e6217..0000000
--- a/tool/python/examples/train_mnist.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-'''
-Example script of MLP model for MNIST dataset
-'''
-import os, sys
-import numpy as np
-
-current_path_ = os.path.dirname(__file__)
-singa_root_=os.path.abspath(os.path.join(current_path_,'../../..'))
-sys.path.append(os.path.join(singa_root_,'tool','python'))
-
-from singa.driver import Driver
-from singa.layer import *
-from singa.model import *
-
-def swap32(x):
-    return (((x << 24) & 0xFF000000) |
-            ((x <<  8) & 0x00FF0000) |
-            ((x >>  8) & 0x0000FF00) |
-            ((x >> 24) & 0x000000FF))
-
-def load_dataset():
-    ''' MNIST dataset
-        train-images: 4 int32 headers & int8 pixels
-        train-labels: 2 int32 headers & int8 labels
-    '''
-    print '[Load MNIST dataset]'
-    fname_train_image = "examples/mnist/train-images-idx3-ubyte"
-    fname_train_label = "examples/mnist/train-labels-idx1-ubyte"
-    nb_header = [4, 2]
-
-    info = swap32(np.fromfile(fname_train_image, dtype=np.uint32, count=nb_header[0]))
-    nb_samples = info[1] 
-    shape = (info[2],info[3])
-    
-    x = np.fromfile(fname_train_image, dtype=np.uint8)
-    x = x[np.dtype(np.int32).itemsize*nb_header[0]:] # skip header
-    x = x.reshape(nb_samples, shape[0]*shape[1]) 
-    print '   data x:', x.shape
-    y = np.fromfile(fname_train_label, dtype=np.uint8)
-    y = y[np.dtype(np.int32).itemsize*nb_header[1]:] # skip header
-    y = y.reshape(nb_samples, 1) 
-    print '  label y:', y.shape
-
-    return x, y
-
-#-------------------------------------------------------------------
-print '[Layer registration/declaration]'
-d = Driver()
-d.Init(sys.argv)
-
-input = ImageInput(28, 28)
-label = LabelInput()
-
-nn = []
-nn.append(input)
-nn.append(Dense(2500, init='uniform'))
-nn.append(Activation('stanh'))
-nn.append(Dense(2000, init='uniform'))
-nn.append(Activation('stanh'))
-nn.append(Dense(1500, init='uniform'))
-nn.append(Activation('stanh'))
-nn.append(Dense(1000, init='uniform'))
-nn.append(Activation('stanh'))
-nn.append(Dense(500, init='uniform'))
-nn.append(Activation('stanh'))
-nn.append(Dense(10, init='uniform'))
-loss = Loss('softmaxloss')
-
-# updater
-sgd = SGD(lr=0.001, lr_type='step')
-
-#-------------------------------------------------------------------
-batchsize = 64 
-disp_freq = 10
-
-x, y = load_dataset()
-
-print '[Start training]'
-for i in range(x.shape[0] / batchsize):
-    xb, yb = x[i*batchsize:(i+1)*batchsize,:], y[i*batchsize:(i+1)*batchsize,:]
-    nn[0].Feed(xb)
-    label.Feed(yb)
-    for h in range(1, len(nn)):
-        nn[h].ComputeFeature(nn[h-1])
-    loss.ComputeFeature(nn[-1], label)
-    if (i+1)%disp_freq == 0:
-        print '  Step {:>3}: '.format(i+1),
-        loss.display()
-
-    loss.ComputeGradient()
-    for h in range(len(nn)-1, 0, -1):
-        nn[h].ComputeGradient()
-        sgd.Update(i+1, nn[h])
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa.py
----------------------------------------------------------------------
diff --git a/tool/python/singa.py b/tool/python/singa.py
deleted file mode 100755
index e44e94d..0000000
--- a/tool/python/singa.py
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-import os
-import sys
-import string
-import pb2.job_pb2 as job_pb2
-import singa.driver as driver
-from google.protobuf.text_format import Merge
-
-if __name__ == '__main__':
-    """Invoke the training program using this python script.
-    ./bin/singa-run.sh -exec tool/python/singa.py -conf examples/cifar10/job.conf
-    """
- 
-    i = sys.argv.index('-conf')
-    s = open(sys.argv[i+1], 'r').read()
-    s = str(s)
-    j = job_pb2.JobProto()
-    Merge(s, j)
-    b = j.SerializeToString()
-    d = driver.Driver()
-    d.InitLog(sys.argv[0])
-    d.Init(sys.argv)
-    d.Train(False, b)
-    #d.Test(b)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/__init__.py b/tool/python/singa/__init__.py
deleted file mode 100644
index a796a7a..0000000
--- a/tool/python/singa/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/driver.i
----------------------------------------------------------------------
diff --git a/tool/python/singa/driver.i b/tool/python/singa/driver.i
deleted file mode 100644
index 63f2287..0000000
--- a/tool/python/singa/driver.i
+++ /dev/null
@@ -1,117 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-/*interface file for swig */
-
-%module driver
-%include "std_vector.i"
-%include "std_string.i"
-%include "argcargv.i"
-%include "carrays.i"
-%array_class(float, floatArray);
-
-%apply (int ARGC, char **ARGV) { (int argc, char **argv)  }
-%{
-#include "singa/driver.h"
-#include "singa/worker.h"
-#include "singa/neuralnet/layer.h"
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/neuralnet/loss_layer.h"
-#include "singa/utils/blob.h"
-#include "singa/utils/param.h"
-#include "singa/utils/updater.h"
-#include "singa/proto/job.pb.h"
-#include "singa/proto/common.pb.h"
-%}
-
-namespace std {
-  %template(strVector) vector<string>;
-  %template(intVector) vector<int>;
-  %template(floatVector) vector<float>;
-  %template(layerVector) vector<singa::Layer*>;
-  %template(paramVector) vector<singa::Param*>;
-}
-
-namespace singa{
-  class Driver{
-    public:
-    void Train(bool resume, const std::string job_conf);
-    void Init(int argc, char **argv);
-    void InitLog(char* arg);
-    void Test(const std::string job_conf);
-  };
-
-  %nodefault Worker;
-  class Worker{
-    public:
-      static singa::Worker* CreateWorker(const std::string str);
-      void InitNetParams(const std::string& folder, std::vector<singa::Layer*> net);
-      void Checkpoint(int step, const std::string& folder, std::vector<singa::Layer*> net);
-  };
-    
-  class DummyLayer{
-    public:
-      void Setup(const std::string str, const std::vector<singa::Layer*>& srclayers);
-      void Feed(int batchsize, std::vector<float>& data, std::vector<int>& aux_data);
-      singa::Layer* ToLayer();
-  };
-
-  %nodefault Layer;
-  class Layer{
-    public:
-      static singa::Layer* CreateLayer(const std::string str);
-      static void SetupLayer(singa::Layer* layer, const std::string str, const std::vector<singa::Layer*>& srclayers);
-      virtual void ComputeFeature(int flag, const std::vector<singa::Layer*>& srclayers); 
-      virtual void ComputeGradient(int flag, const std::vector<singa::Layer*>& srclayers);
-      virtual const singa::Blob<float>& data(const singa::Layer* from); 
-      virtual const std::vector<singa::Param*> GetParams();
-      virtual const std::string ToString(bool debug, int flag);
-      void SetParams(std::vector<singa::Param*> params);
-  };
-
-  %nodefault Updater;
-  class Updater{
-    public:
-      static singa::Updater* CreateUpdater(const std::string str);
-      virtual void Update(int step, singa::Param* param, float grad_scale);
-  };
-
-  template <typename Dtype>
-  class Blob{
-    public:
-      inline int count();
-      inline const std::vector<int>& shape();
-      inline Dtype* mutable_cpu_data(); 
-      inline const Dtype* cpu_data();
-  };
-
-  class Param{
-    public:
-      inline int size();
-      inline const std::vector<int>& shape();
-      inline float* mutable_cpu_data();
-      void FromProto(const std::string str);
-      /*void ToProto(singa::BlobProto* blob); 
-      */
-  };
-
-  %template(floatBlob) Blob<float>;
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/generatepy.sh
----------------------------------------------------------------------
diff --git a/tool/python/singa/generatepy.sh b/tool/python/singa/generatepy.sh
deleted file mode 100755
index 488d96a..0000000
--- a/tool/python/singa/generatepy.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-#/**
-# * Licensed to the Apache Software Foundation (ASF) under one
-# * or more contributor license agreements.  See the NOTICE file
-# * distributed with this work for additional information
-# * regarding copyright ownership.  The ASF licenses this file
-# * to you under the Apache License, Version 2.0 (the
-# * "License"); you may not use this file except in compliance
-# * with the License.  You may obtain a copy of the License at
-# *
-# *     http://www.apache.org/licenses/LICENSE-2.0
-# *
-# * Unless required by applicable law or agreed to in writing, software
-# * distributed under the License is distributed on an "AS IS" BASIS,
-# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# * See the License for the specific language governing permissions and
-# * limitations under the License.
-# */
-
-#The following commands are only for developers adding new py apis.
-swig -c++ -python driver.i
-#g++ -fPIC ../../../src/driver.cc driver_wrap.cxx -shared -o _driver.so \
-# 	 -L../../../.libs/ -lsinga -DMSHADOW_USE_CUDA=0 \
-#    -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0 -std=c++11 \
-#    -I../../../include \
-#    -I/usr/include/python2.7/

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/initializations.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/initializations.py b/tool/python/singa/initializations.py
deleted file mode 100644
index f016f1f..0000000
--- a/tool/python/singa/initializations.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-'''
-This module pre-defines initial value for fields
-'''
-
-def get_init_values(identifier, **kwargs):
-    '''
-    This method returns field, a set of key-value pairs, that
-    key is specified by identifier and values are initialized.
-    '''
-
-    field = {}
-
-    if identifier == 'none':
-        return
-
-    if identifier == 'uniform':
-        scale = kwargs['scale'] if 'scale' in kwargs else 0.05
-        names = ['low', 'high']
-        values = [-scale, scale]
-
-    elif identifier == 'constant':
-        names = ['value']
-        values = [0]
-
-    elif identifier == 'gaussian':
-        names = ['mean', 'std']
-        values = [0, 0.01]
-
-    elif identifier == 'conv2d':
-        names = ['stride', 'pad']
-        values = [1, 0]
-
-    elif identifier == 'lrn2d':
-        names = ['alpha', 'beta', 'knorm']
-        values = [1, 0.75, 1]
-
-    elif identifier == 'dropout':
-        names = ['ratio']
-        values = [0.5]
-
-    for i in range(len(names)):
-        field[names[i]] = kwargs[names[i]] if names[i] in kwargs else values[i]
-
-    return field

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/layer.py b/tool/python/singa/layer.py
deleted file mode 100644
index c9a992d..0000000
--- a/tool/python/singa/layer.py
+++ /dev/null
@@ -1,693 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-'''
-This script includes Layer class and its subclasses that
-users can configure different types of layers for their model.
-'''
-import numpy as np
-from singa.parameter import Parameter, set_param_field
-from singa.initializations import get_init_values
-from singa.utils.utility import setval, generate_name
-from singa.utils.message import *
-from google.protobuf import text_format
-
-from singa.driver import Layer as SingaLayer, Updater as SingaUpdater,\
-                         intVector, floatVector, layerVector,\
-                         paramVector, floatArray_frompointer, DummyLayer
-
-class Layer(object):
-
-    singaupdater = None
-
-    def __init__(self, **kwargs):
-        '''
-        **kwargs (KEY=VALUE)
-          partition_dim = (int)  // partition dimension for net
-        '''
-
-        self.layer = Message('Layer', **kwargs).proto
-        # required field
-        if not 'name' in kwargs:
-            setval(self.layer, name=generate_name('layer', 1))
-
-        # layer connectivity is set in Model.build()
-        self.is_datalayer = False
-        self.singalayer = None
-        self.srclayers = []
-
-        # set src for Rafiki
-        if 'src' in kwargs:
-            self.src = kwargs['src']
-        else:
-            self.src = None
-
-    def setup(self, srclys):
-        ''' Create singa::Layer and store srclayers
-        '''
-        if self.singalayer == None:
-            self.singalayer = SingaLayer.CreateLayer(
-                                    self.layer.SerializeToString())
-            self.singaSrclayerVector = layerVector(len(srclys))
-            for i in range(len(srclys)):
-                self.srclayers.append(srclys[i])
-                self.singaSrclayerVector[i] = srclys[i].get_singalayer()
-            # set up the layer
-            SingaLayer.SetupLayer(self.singalayer,
-                                  self.layer.SerializeToString(),
-                                  self.singaSrclayerVector)
-
-    def ComputeFeature(self, *srclys):
-        ''' The method creates and sets up singa::Layer
-            and maintains its source layers
-            then call ComputeFeature for data transformation.
-
-            *srclys = (list)  // a list of source layers
-        '''
-        # create singa::Layer and store srclayers
-        if self.singalayer == None:
-            if self.src != None:
-                srclys = self.src
-            self.singalayer = SingaLayer.CreateLayer(
-                                    self.layer.SerializeToString())
-            self.singaSrclayerVector = layerVector(len(srclys))
-            for i in range(len(srclys)):
-                self.srclayers.append(srclys[i])
-                self.singaSrclayerVector[i] = srclys[i].get_singalayer()
-            # set up the layer
-            SingaLayer.SetupLayer(self.singalayer,
-                                  self.layer.SerializeToString(),
-                                  self.singaSrclayerVector)
-
-        self.singalayer.ComputeFeature(1, self.singaSrclayerVector)
-
-    def ComputeGradient(self):
-        ''' The method creates singa::Updater
-            and calls ComputeGradient for gradient computation
-            then updates the parameters.
-        '''
-        # call ComputeGradient of Singa
-        self.singalayer.ComputeGradient(1, self.singaSrclayerVector)
-
-    def UpdateParams(self, step, upd):
-        ''' The method updates parameter values
-        '''
-        # update parameters
-        singaParams = self.singalayer.GetParams()
-        for par in singaParams:
-            upd.singaupdater.Update(step, par, 1.0)
-
-    def GetParams(self):
-        ''' The method gets parameter values
-            singaParams[0] for weight
-            singaParams[1] for bias
-        '''
-        singaParams = self.singalayer.GetParams()
-        assert len(singaParams) == 2, 'weight and bias'
-        # for weight
-        weight_array = floatArray_frompointer(singaParams[0].mutable_cpu_data())
-        weight = [weight_array[i] for i in range(singaParams[0].size())]
-        weight = np.array(weight).reshape(singaParams[0].shape())
-        # for bias
-        bias_array = floatArray_frompointer(singaParams[1].mutable_cpu_data())
-        bias = [bias_array[i] for i in range(singaParams[1].size())]
-        bias = np.array(bias).reshape(singaParams[1].shape()[0], 1)
-
-        return weight, bias
-
-    def SetParams(self, *params):
-        ''' The method sets parameter values
-            params[0] for weight
-            params[1] for bias
-        '''
-        singaParams = self.singalayer.GetParams()
-        import pb2.common_pb2 as cm
-        for k in range(len(params)):
-            bp = cm.BlobProto()
-            bp.shape.append(int(params[k].shape[0]))
-            bp.shape.append(int(params[k].shape[1]))
-            for i in range(params[k].shape[0]):
-                for j in range(params[k].shape[1]):
-                    bp.data.append(params[k][i, j])
-            singaParams[k].FromProto(bp.SerializeToString())
-
-    def GetData(self):
-        ''' The method gets layer data values
-        '''
-        blobptr = self.singalayer.data(self.singalayer)
-        data_array = floatArray_frompointer(blobptr.mutable_cpu_data())
-        data = [data_array[i] for i in range(blobptr.count())]
-        return data
-
-    def display(self):
-        debug, flag = False, 0
-        print self.singalayer.ToString(debug, flag)
-
-    def get_singalayer(self):
-        return self.singalayer
-
-
-class Dummy(object):
-
-    def __init__(self, **kwargs):
-        ''' Dummy layer is used for data layer to feed/fetch input data
-            or label information
-        '''
-        self.is_datalayer = True
-        self.srclayers = None
-        self.singalayer = None
-
-        # create layer proto for Dummy layer
-        kwargs = {'name':'dummy', 'type':kDummy}
-        self.layer = Message('Layer', **kwargs).proto
-
-    def setup(self, data_shape):
-        ''' Create and Setup singa Dummy layer
-            called by load_model_parameter
-        '''
-        if self.singalayer == None:
-            setval(self.layer.dummy_conf, input=True)
-            setval(self.layer.dummy_conf, shape=data_shape)
-            self.singalayer = DummyLayer()
-            self.singalayer.Setup(self.layer.SerializeToString(),
-                                  layerVector(0))
-
-    def Feed(self, shape, data, aux_data):
-        ''' Create and Setup singa::DummyLayer for input data
-            Insert data using Feed()
-        '''
-        batchsize = shape[0]
-        hdim = reduce(lambda x, y: x*y, shape[1:])
-        datasize = batchsize * hdim
-
-        # create and setup the dummy layer
-        if self.singalayer == None:
-            self.setup(shape)
-
-        if data is not None:
-            data = data.astype(np.float)
-            dataVector = floatVector(datasize)
-            for i in range(batchsize):
-                for j in range(hdim):
-                    dataVector[i*hdim+j] = data[i, j]
-            labelVector = intVector(0)
-
-        if aux_data is not None:
-            aux_data = aux_data.astype(np.int)
-            labelVector = intVector(datasize)
-            for i in range(batchsize):
-                labelVector[i] = aux_data[i, 0]
-            dataVector = floatVector(0)
-
-        self.singalayer.Feed(batchsize, dataVector, labelVector)
-
-    def get_singalayer(self):
-        return self.singalayer.ToLayer()
-
-class ImageInput(Dummy):
-    ''' This class is used to feed image data
-    '''
-    def __init__(self, width=None, height=None, nb_channel=1):
-        super(ImageInput, self).__init__()
-        self.width = width
-        self.height = height
-        self.nb_channel = nb_channel
-
-    def Feed(self, image_data):
-        batchsize = image_data.shape[0]
-        if self.width == None or self.height == None:
-            hdim = image_data.shape[1]
-            imgsize = int(np.sqrt(hdim/self.nb_channel))
-        shape = [batchsize, self.nb_channel, self.width, self.height]
-        Dummy.Feed(self, shape, image_data, None)
-
-class LabelInput(Dummy):
-    ''' This class is used to feed label data
-    '''
-    def __init__(self):
-        super(LabelInput, self).__init__()
-
-    def Feed(self, label_data):
-        Dummy.Feed(self, label_data.shape, None, label_data)
-
-
-class Data(Layer):
-
-    def __init__(self, load, phase='train', checkpoint=None,
-                 conf=None, **kwargs):
-        '''
-        required
-          load       = (string)  // type of data
-        optional
-          phase      = (string)  // phase of data layer
-          checkpoint = (string)  // checkpoint path
-          conf       = (Store)   // Store object
-          **kwargs (KEY=VALUE)
-            partition_dim = (int)  // partition dimension for net
-        '''
-
-        assert load != None, 'data type should be specified'
-        if load == 'kData':
-            super(Data, self).__init__(name=generate_name('data'),
-                                       user_type=load, **kwargs)
-        else:
-            self.layer_type = enumLayerType(load)
-            super(Data, self).__init__(name=generate_name('data'),
-                                       type=self.layer_type, **kwargs)
-        self.is_datalayer = True
-
-        # include/exclude
-        setval(self.layer, include=enumPhase(phase))
-        #setval(self.layer, exclude=kTest if phase=='train' else kTrain)
-
-        if conf == None:
-            if load == 'kData':
-                setval(self.layer.Extensions[data_conf], **kwargs)
-            else:
-                setval(self.layer.store_conf, **kwargs)
-        else:
-            setval(self.layer, store_conf=conf.proto)
-
-        self.checkpoint = checkpoint # checkpoint for training data
-
-
-class Convolution2D(Layer):
-
-    def __init__(self, nb_filter=0, kernel=0, stride=1, pad=0,
-                 init=None, w_param=None, b_param=None,
-                 activation=None, **kwargs):
-        '''
-        required
-          nb_filter = (int)        // the number of filters
-          kernel    = (int/tuple)  // the size of filter
-        optional
-          stride    = (int/tuple)  // the size of stride
-          pad       = (int/tuple)  // the size of padding
-          init      = (string)     // 'uniform', 'gaussian', 'constant'
-          w_param   = (Parameter)  // Parameter object for weight
-          b_param   = (Parameter)  // Parameter object for bias
-          **kwargs (KEY=VALUE)
-            w_lr = (float) // learning rate multiplier for weight, used to
-                           // scale the learning rate when updating parameters.
-            w_wd = (float) // weight decay multiplier for weight, used to
-                           // scale the weight decay when updating parameters.
-            b_lr = (float) // learning rate multiplier for bias
-            b_wd = (float) // weight decay multiplier for bias
-        '''
-
-        assert nb_filter > 0, 'nb_filter should be set as positive int'
-        super(Convolution2D, self).__init__(name=generate_name('conv', 1),
-                                            type=kCConvolution, **kwargs)
-        fields = {"num_filters":nb_filter}
-        # for kernel
-        if type(kernel) == int:
-            fields['kernel'] = kernel
-        else:
-            fields['kernel_x'] = kernel[0]
-            fields['kernel_y'] = kernel[1]
-        # for stride
-        if type(stride) == int:
-            fields['stride'] = stride
-        else:
-            fields['stride_x'] = stride[0]
-            fields['stride_y'] = stride[1]
-        # for pad
-        if type(pad) == int:
-            fields['pad'] = pad
-        else:
-            fields['pad_x'] = pad[0]
-            fields['pad_y'] = pad[1]
-
-        setval(self.layer.convolution_conf, **fields)
-
-        # parameter w
-        if w_param == None:
-            self.init = 'gaussian' if init == None else init
-            w_param = Parameter(init=self.init)
-        set_param_field(w_param.param, 'w', True, **kwargs)
-        setval(self.layer, param=w_param.param)
-
-        # parameter b
-        if b_param == None:
-            self.init = 'constant' if init == None else init
-            b_param = Parameter(init=self.init) # default: constant
-        set_param_field(b_param.param, 'b', True, **kwargs)
-        setval(self.layer, param=b_param.param)
-
-        # following layers: e.g., activation, dropout, etc.
-        if activation:
-            self.mask = Activation(activation=activation).layer
-
-
-class MaxPooling2D(Layer):
-
-    def __init__(self, pool_size=None,
-                 stride=1, ignore_border=True, **kwargs):
-        '''
-        Max Pooling layer
-
-        required
-          pool_size     = (int|tuple) // the size for pooling
-        optional
-          stride        = (int)       // the size of striding
-          ignore_border = (bool)      // flag for padding
-          **kwargs                    // fields for Layer class
-        '''
-
-        assert pool_size != None, 'pool_size is required'
-        if type(pool_size) == int:
-            pool_size = (pool_size, pool_size)
-        assert type(pool_size) == tuple and pool_size[0] == pool_size[1], \
-               'currently pool size should be square in Singa'
-        super(MaxPooling2D, self).__init__(name=generate_name('pool'),
-                                           type=kCPooling, **kwargs)
-        fields = {'pool' : PoolingProto().MAX,
-                  'kernel' : pool_size[0],
-                  'stride' : stride,
-                  'pad' : 0 if ignore_border else 1}
-        setval(self.layer.pooling_conf, **fields)
-
-class AvgPooling2D(Layer):
-
-    def __init__(self, pool_size=None,
-                 stride=1, ignore_border=True, **kwargs):
-        '''
-        required
-          pool_size     = (int|tuple) // size for pooling
-        optional
-          stride        = (int)       // size of striding
-          ignore_border = (bool)      // flag for padding
-          **kwargs                    // fields for Layer class
-        '''
-
-        assert pool_size != None, 'pool_size is required'
-        if type(pool_size) == int:
-            pool_size = (pool_size, pool_size)
-        assert type(pool_size) == tuple and pool_size[0] == pool_size[1], \
-               'currently pool size should be square in Singa'
-        super(AvgPooling2D, self).__init__(name=generate_name('pool'),
-                                           type=kCPooling, **kwargs)
-        self.layer.pooling_conf.pool = PoolingProto().AVG
-        fields = {'pool' : PoolingProto().AVG,
-                  'kernel' : pool_size[0],
-                  'stride' : stride,
-                  'pad' : 0 if ignore_border else 1}
-        setval(self.layer.pooling_conf, **fields)
-
-class LRN2D(Layer):
-
-    def __init__(self, size=0, **kwargs):
-        '''
-        required
-          size = (int)  // local size
-        '''
-
-        super(LRN2D, self).__init__(name=generate_name('norm'), type=kLRN, **kwargs)
-        # required
-        assert size != 0, 'local size should be set'
-        self.layer.lrn_conf.local_size = size
-        init_values = get_init_values('lrn2d', **kwargs)
-        setval(self.layer.lrn_conf, **init_values)
-
-class Loss(Layer):
-
-    def __init__(self, lossname, topk=1, **kwargs):
-        '''
-        required
-          lossname = (string) // softmaxloss, euclideanloss
-        '''
-        self.layer_type = enumLayerType(lossname)
-        super(Loss, self).__init__(name=generate_name(lossname),
-                                         type=self.layer_type, **kwargs)
-        if lossname == 'softmaxloss':
-            self.layer.softmaxloss_conf.topk = topk
-
-class Activation(Layer):
-
-    def __init__(self, activation='stanh', **kwargs):
-        '''
-        required
-          activation = (string) // relu, sigmoid, tanh, stanh, softmax.
-        '''
-        if activation == 'tanh':
-            print 'Warning: Tanh layer is not supported for CPU'
-
-        self.name = activation
-        self.layer_type = kActivation
-        if activation == 'stanh':
-            self.layer_type = kSTanh
-        elif activation == 'softmax':
-            self.layer_type = kSoftmax
-        super(Activation, self).__init__(name=generate_name(self.name),
-                                         type=self.layer_type, **kwargs)
-        if activation == 'relu':
-            self.layer.activation_conf.type = RELU
-        elif activation == 'sigmoid':
-            self.layer.activation_conf.type = SIGMOID
-        elif activation == 'tanh':
-            self.layer.activation_conf.type = TANH # for GPU
-        #elif activation == 'stanh':
-        #    self.layer.activation_conf.type = STANH
-
-
-class Dropout(Layer):
-
-    def __init__(self, ratio=0.5):
-        '''
-        required
-          ratio = (float) // ratio of drop out nodes
-        '''
-
-        self.name = 'dropout'
-        self.layer_type = enumLayerType(self.name)
-        super(Dropout, self).__init__(name=generate_name(self.name),
-                                      type=self.layer_type, **kwargs)
-        self.layer.dropout_conf.dropout_ratio = ratio
-
-class Accuracy(Layer):
-
-    def __init__(self, **kwargs):
-        '''
-        '''
-
-        self.name = 'accuracy'
-        self.layer_type = enumLayerType(self.name)
-        super(Accuracy, self).__init__(name=generate_name(self.name),
-                                       type=self.layer_type, **kwargs)
-
-class RGB(Layer):
-
-    def __init__(self, meanfile=None, **kwargs):
-        '''
-        required
-          meanfile = (string) // path to meanfile (depreciated)
-        '''
-
-        assert meanfile != None, 'meanfile should be specified'
-        self.name = 'rgb'
-        self.layer_type = kRGBImage
-        super(RGB, self).__init__(name=generate_name(self.name),
-                                  type=self.layer_type)
-        self.layer.rgbimage_conf.meanfile = meanfile
-
-class Dense(Layer):
-
-    def __init__(self, output_dim=0, activation=None,
-                 init=None, w_param=None, b_param=None, input_dim=None,
-                 **kwargs):
-        '''
-        required
-          output_dim = (int)
-        optional
-          activation = (string)
-          init       = (string)     // 'uniform', 'gaussian', 'constant'
-          w_param    = (Parameter)  // Parameter object for weight
-          b_param    = (Parameter)  // Parameter object for bias
-          **kwargs
-            w_lr = (float) // learning rate multiplier for weight, used to
-                           // scale the learning rate when updating parameters.
-            w_wd = (float) // weight decay multiplier for weight, used to
-                           // scale the weight decay when updating parameters.
-            b_lr = (float) // learning rate multiplier for bias
-            b_wd = (float) // weight decay multiplier for bias
-        '''
-        # required
-        assert output_dim > 0, 'output_dim should be set'
-        super(Dense, self).__init__(type=kInnerProduct, **kwargs)
-        self.layer.innerproduct_conf.num_output = output_dim
-        if 'transpose' in kwargs:
-            self.layer.innerproduct_conf.transpose = kwargs['transpose']
-
-        # parameter w (default: gaussian)
-        if w_param == None:
-            self.init = 'gaussian' if init == None else init
-            w_param = Parameter(init=self.init)
-        set_param_field(w_param.param, 'w', False, **kwargs)
-        setval(self.layer, param=w_param.param)
-
-        # parameter b (default: constant)
-        if b_param == None:
-            self.init = 'constant' if init == None else init
-            b_param = Parameter(init=self.init)
-        set_param_field(b_param.param, 'b', False, **kwargs)
-        setval(self.layer, param=b_param.param)
-
-        # following layers: e.g., activation, dropout, etc.
-        if activation:
-            self.mask = Activation(activation=activation).layer
-
-
-''' Classes to deal with multiple layers
-'''
-class Autoencoder(object):
-
-    def __init__(self, hid_dim=None, out_dim=0,
-                 activation=None, param_share=True):
-        '''
-        Generate a set of layers (like MLP) for encoder and decoder
-        The layers are expanded and added in Sequential.add()
-
-        required
-          hid_dim     = (int/list) // the number of nodes in hidden layers
-          out_dim     = (int)      // the number of nodes in the top layer
-        optional
-          activation  = (string)
-          param_share = (bool)     // to share params in encoder and decoder
-        '''
-
-        # required
-        assert out_dim > 0, 'out_dim should be set'
-        self.out_dim = out_dim
-        assert hid_dim != None, 'hid_dim should be set'
-        self.hid_dim = [hid_dim] if type(hid_dim) == int else hid_dim
-
-        self.layer_type = 'AutoEncoder'
-        self.activation = activation
-        self.param_share = param_share
-
-class RBM(Layer):
-
-    def __init__(self, out_dim=None, w_param=None, b_param=None,
-                 sampling=None, **kwargs):
-        '''
-        Generate a set of layers (like MLP) according to the number of elements
-          in out_dim, and on top of it, two layers RBMVis and RBMHid with
-          bidirectional connection
-        The layers are expanded and added in Energy.add()
-
-        required
-          out_dim  = (int) or (int list) // the number of hidden nodes
-        optional
-          w_param  = (Parameter)  // Parameter object for weight
-          b_param  = (Parameter)  // Parameter object for bias
-          sampling = (string)
-        '''
-
-        assert out_dim > 0, 'out_dim should be set'
-        self.out_dim = [out_dim] if type(out_dim) == int else out_dim
-
-        self.name = kwargs['name'] if 'name' in kwargs else 'RBMVis'
-        self.layer_type = kwargs['type'] if 'type' in kwargs else kRBMVis
-        super(RBM, self).__init__(name=generate_name(self.name,
-                                                     withnumber=False),
-                                  type=self.layer_type, **kwargs)
-        setval(self.layer.rbm_conf, hdim=self.out_dim[-1])
-        if self.layer_type == kRBMHid and sampling != None:
-            if sampling == 'gaussian':
-                setval(self.layer.rbm_conf, gaussian=True)
-
-        # parameter w
-        if w_param == None:
-            w_param = Parameter(init='gaussian', **kwargs)
-            set_param_field(w_param.param, 'w', withnumber=False,
-                            level=len(self.out_dim), **kwargs)
-        else:
-            if self.layer_type == kRBMHid:
-                del kwargs['name']
-            else:
-                set_param_field(w_param.param, 'w', withnumber=False,
-        	  	        level=len(self.out_dim), **kwargs)
-        setval(self.layer, param=w_param.param)
-
-        # parameter b
-        if b_param == None:
-            b_param = Parameter(init='constant', **kwargs)
-            set_param_field(b_param.param, 'b', withnumber=False,
-        		    level=len(self.out_dim), **kwargs)
-        else:
-            if self.layer_type == kRBMHid:
-                pass
-            else:
-                set_param_field(b_param.param, 'b', withnumber=False,
-        		        level=len(self.out_dim), **kwargs)
-        setval(self.layer, param=b_param.param)
-
-        if self.layer_type == kRBMVis:
-            wname = w_param.param.name
-            parw = Parameter(name=wname+"_", init='none', share_from=wname)
-            bname = b_param.param.name
-            parb = Parameter(name=bname+"2", wd=0, init='constant')
-            self.bidirect = RBM(self.out_dim, name='RBMHid', type=kRBMHid,
-                         w_param=parw, b_param=parb, sampling=sampling).layer
-
-class Embedding(Layer):
-
-    def __init__(self, in_dim, out_dim, w_param=None, **kwargs):
-
-        super(Embedding, self).__init__(name=generate_name('embedding', 1),
-                                        user_type='kEmbedding')
-        fields = {'vocab_size': in_dim,
-                  'word_dim': out_dim}
-        setval(self.layer.Extensions[embedding_conf], **fields)
-        if w_param == None:
-            # default: uniform
-            w_param = Parameter(name=generate_name('w'), init=init)
-        else:
-            set_param_field(w_param.param, 'w', True, **kwargs)
-        setval(self.layer, param=w_param.param)
-
-class RNNLM(Layer):
-
-    def __init__(self, dim, w_param=None, **kwargs):
-
-        super(RNNLM, self).__init__(name=generate_name('hidden', 1),
-                                    user_type='kHidden')
-        if w_param == None:
-            # default: uniform
-            w_param = Parameter(name=generate_name('w'), init=init)
-        else:
-            set_param_field(w_param.param, 'w', True, **kwargs)
-        setval(self.layer, param=w_param.param)
-
-class UserLossRNNLM(Layer):
-
-    def __init__(self, **kwargs):
-
-        super(UserLossRNNLM, self).__init__(name=generate_name('loss', 1),
-                                            user_type='kLoss')
-        self.layer.Extensions[loss_conf].nclass = kwargs['nclass']
-        self.layer.Extensions[loss_conf].vocab_size = kwargs['vocab_size']
-        setval(self.layer, param=Parameter(name=generate_name('w'),
-                                           init='uniform', scale=0.3).param)
-        setval(self.layer, param=Parameter(name=generate_name('w', 1),
-                                           init='uniform', scale=0.3).param)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/model.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/model.py b/tool/python/singa/model.py
deleted file mode 100644
index 4a6a688..0000000
--- a/tool/python/singa/model.py
+++ /dev/null
@@ -1,716 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-'''
-This script includes Model class and its subclasses that
-users can configure model parameter.
-'''
-
-import sys, re, subprocess
-from singa.layer import *
-from singa.utils.utility import *
-from singa.utils.message import *
-from google.protobuf import text_format
-
-from singa.driver import Updater as SingaUpdater
-
-class Model(object):
-    ''' Configure model parameter
-        - add(): add layer
-        - compile(): specify Updater and Cluster protos
-        - build(): construct a model (i.e., NetProto)
-        - fit(): run singa for training
-        - evaluate(): run singa for testing
-    '''
-
-    def __init__(self, name='my model', argv=None, label=False):
-        '''
-        optional
-          name  = (string) // name of model/job
-          argv             // pass sys.argv to source
-          label = (bool)   // exist label layer (depreciated)
-        '''
-        self.jobconf = Message('Job', name=name).proto
-        self.layers = []
-        self.label = label
-        self.argv = argv
-        self.result = None
-        self.last_checkpoint_path = None
-        self.cudnn = False
-        self.accuracy = False
-
-    def add(self, layer):
-        '''
-        add layer
-        '''
-        pass
-
-    def exist_datalayer(self, phase):
-        '''
-        check if data layer exists
-        '''
-        for ly in self.layers:
-            if enumPhase(phase) in ly.layer.include:
-                return True
-        return False
-
-    def compile(self, optimizer=None, cluster=None,
-                      loss=None, topk=1, **kwargs):
-        '''
-        required
-          optimizer = (Updater) // updater settings, e.g., SGD
-          cluster   = (Cluster) // cluster settings
-        optional
-          loss      = (string)  // name of loss function type
-          topk      = (int)     // nb of results considered to compute accuracy
-        '''
-        assert optimizer != None, 'optimizer (Updater component) should be set'
-        assert cluster != None, 'cluster (Cluster component) should be set'
-        setval(self.jobconf, updater=optimizer.proto)
-        setval(self.jobconf, cluster=cluster.proto)
-
-        # take care of loss function layer
-        if loss == None:
-            print 'loss layer is not set'
-        else:
-            if hasattr(self.layers[-1], 'mask'):
-                ly = self.layers[-1].mask
-            else:
-                ly = self.layers[-1].layer
-
-            # take care of the last layer
-            if ly.type == enumLayerType('softmax'):
-                # revise the last layer
-                if loss == 'categorical_crossentropy':
-                    setval(ly, type=enumLayerType('softmaxloss'))
-                    setval(ly.softmaxloss_conf, topk=topk)
-                elif loss == 'mean_squared_error':
-                    setval(ly, type=enumLayerType('euclideanloss'))
-            else:
-                # add new layer
-                if loss == 'categorical_crossentropy':
-                    self.add(Loss('softmaxloss', topk=topk))
-                elif loss == 'mean_squared_error':
-                    self.add(Loss('euclideanloss'))
-                elif loss == 'user_loss_rnnlm': # user-defined loss layer
-                    self.add(UserLossRNNLM(nclass=kwargs['nclass'],
-                                           vocab_size=kwargs['in_dim']))
-
-    def build(self):
-        '''
-        construct neuralnet proto
-        '''
-        net = NetProto()
-        slyname = self.layers[0].layer.name
-        for i in range(len(self.layers)):
-            ly = net.layer.add()
-            ly.CopyFrom(self.layers[i].layer)
-            lastly = ly
-            if self.layers[i].is_datalayer == True:
-                continue
-            getattr(ly, 'srclayers').append(slyname)
-            slyname = ly.name
-            if hasattr(self.layers[i], 'mask'):
-                mly = net.layer.add()
-                mly.CopyFrom(self.layers[i].mask)
-                getattr(mly, 'srclayers').append(slyname)
-                slyname = mly.name
-                lastly = mly
-            if hasattr(self.layers[i], 'bidirect'):
-                bly = net.layer.add()
-                bly.CopyFrom(self.layers[i].bidirect)
-                getattr(bly, 'srclayers').append(slyname)
-
-        # deal with label layer (depreciated)
-        if self.label == True:
-            label_layer = Layer(name='label', type=kLabel)
-            ly = net.layer.add()
-            ly.CopyFrom(label_layer.layer)
-            getattr(ly, 'srclayers').append(self.layers[0].layer.name)
-            getattr(lastly, 'srclayers').append(label_layer.layer.name)
-        else:
-            if lastly.name == 'RBMVis':
-                getattr(lastly, 'srclayers').append(bly.name)
-            else:
-                getattr(lastly, 'srclayers').append(self.layers[0].layer.name)
-
-        if self.accuracy == True:
-            smly = net.layer.add()
-            smly.CopyFrom(Layer(name='softmax', type=kSoftmax).layer)
-            setval(smly, include=kTest)
-            getattr(smly, 'srclayers').append(self.layers[-1].layer.name)
-            aly = net.layer.add()
-            aly.CopyFrom(Accuracy().layer)
-            setval(aly, include=kTest)
-            getattr(aly, 'srclayers').append('softmax')
-            getattr(aly, 'srclayers').append(self.layers[0].layer.name)
-
-        # use of cudnn
-        if self.cudnn == True:
-            self.set_cudnn_layer_type(net)
-
-        setval(self.jobconf, neuralnet=net)
-
-    def fit(self, data=None, alg='bp', nb_epoch=0,
-            with_test=False, execpath='', device=None, **fields):
-        '''
-        required
-          data        = (Data)     // Data class object for training data
-          alg         = (string)   // algorithm, e.g., 'bp', 'cd'
-          nb_epoch    = (int)      // the number of training steps
-        optional
-          with_test   = (bool)     // flag if singa runs for test data
-          execpath    = (string)   // path to user own singa (executable file)
-          device      = (int/list) // a list of gpu ids
-          **fields (KEY=VALUE)
-            batch_size       = (int)    // batch size for training data
-            train_steps      = (int)    // nb of steps for training, i.e., epoch
-            disp_freq        = (int)    // frequency to display training info
-            disp_after       = (int)    // display after this number
-            validate_data    = (Data)   // valid data, specified in load_data()
-            validate_freq    = (int)    // frequency of validation
-            validate_steps   = (int)    // total number of steps for validation
-            validate_after   = (int)    // start validation after this number
-            checkpoint_path  = (string) // path to checkpoint file
-            checkpoint_freq  = (int)    // frequency for checkpoint
-            checkpoint_after = (int)    // start checkpointing after this number
-        '''
-        assert data != None, 'Training data shold be set'
-        assert nb_epoch > 0, 'Training steps shold be set'
-
-        if 'batch_size' in fields:  # if new value is set, replace it
-            setval(data.layer.store_conf, batchsize=fields['batch_size'])
-
-        # insert layer for training
-        if self.exist_datalayer('train') == False:
-            self.layers.insert(0, data)
-        setval(self.jobconf, train_steps=nb_epoch)
-        setval(self.jobconf, disp_freq=nb_epoch/10)
-        if 'disp_freq' in fields:
-            setval(self.jobconf, disp_freq=fields['disp_freq'])
-
-        if 'validate_data' in fields:
-            self.layers.insert(1, fields['validate_data'])
-            setval(self.jobconf, validate_freq=nb_epoch/10)
-
-        setval(self.jobconf, **fields)
-
-        # loading checkpoint if it is set
-        if data.checkpoint != None:
-            setval(self.jobconf, checkpoint_path=data.checkpoint)
-
-        # save model parameter (i.e., checkpoint_path)
-        setval(self.jobconf, checkpoint_freq=nb_epoch)
-        self.last_checkpoint_path = '{0}/step{1}-worker0'.format(
-                         self.jobconf.cluster.workspace, nb_epoch)
-
-        # set Train_one_batch component, using backprogapation at default
-        setval(self.jobconf,
-               train_one_batch=Algorithm(type=enumAlgType(alg)).proto)
-
-        # use of cudnn
-        if device != None:
-            setval(self.jobconf, gpu=device)
-            self.cudnn = True
-
-        # start to run singa for training
-        if with_test == False:
-            self.build()  # construct Nneuralnet Component
-            #self.display()
-            return SingaRun(jobproto=self.jobconf,
-                            argv=self.argv, execpath=execpath)
-        else:
-            # run singa in evaluate() with test data
-            pass
-
-    def evaluate(self, data=None, alg='bp',
-                 checkpoint_path=None, execpath='',
-                 device=None, show_acc=False, **fields):
-        '''
-        required
-          data = (Data)   // Data class object for testing data
-        optional
-          alg             = (string)   // algorithm type, (bp at default)
-          checkpoint_path = (list)     // checkpoint path
-          execpaths       = (string)   // path to user's own executable
-          device          = (int/list) // a list of gpu ids
-          show_acc        = (bool)     // compute and the accuacy
-          **fields (KEY=VALUE)
-            batch_size   = (int)  // batch size for testing data
-            test_freq    = (int)  // frequency of testing
-            test_steps   = (int)  // total number of steps for testing
-            test_after   = (int)  // start testing after this number of steps
-        '''
-        assert data != None, 'Testing data should be set'
-        is_testonly = False
-
-        if 'batch_size' in fields:  # if new value is set, replace it
-            setval(data.layer.store_conf, batchsize=fields['batch_size'])
-
-        # insert layer for testing
-        if self.exist_datalayer('test') == False:
-            self.layers.insert(0, data)
-
-        # loading checkpoint if singa runs only for testing
-        if self.exist_datalayer('train') == False:
-            is_testonly = True
-            if checkpoint_path == None:
-                print 'checkpoint_path has not been specified'
-            else:
-                setval(self.jobconf, checkpoint_path=checkpoint_path)
-
-        steps = fields['test_steps'] if 'test_steps' in fields else 10
-        setval(self.jobconf, test_steps=steps)
-        setval(self.jobconf, **fields)
-
-        # set Train_one_batch component, using backprogapation at default
-        setval(self.jobconf,
-               train_one_batch=Algorithm(type=enumAlgType(alg)).proto)
-
-        # use of cudnn
-        if device != None:
-            setval(self.jobconf, gpu=device)
-            self.cudnn = True
-
-        # set True if showing the accuracy
-        self.accuracy = show_acc
-
-        self.build()  # construct Nneuralnet Component
-
-        #--- generate job.conf file for debug purpose
-        #filename = 'job.conf'
-        #with open(filename, 'w') as f:
-        #  f.write(text_format.MessageToString(self.jobconf.cluster))
-        #self.display()
-
-        #--- run singa ---
-        return SingaRun(jobproto=self.jobconf,
-                        argv=self.argv, execpath=execpath, testmode=is_testonly)
-        #return SingaRun_script(filename=filename, execpath=execpath)
-
-
-    def display(self):
-        ''' print out job proto
-        '''
-        print text_format.MessageToString(self.jobconf)
-
-    def set_cudnn_layer_type(self, net):
-        ''' convert LayerType to CdunnLayerType
-        '''
-        for i in range(len(net.layer)):
-            ly_type = net.layer[i].type
-            cudnn_ly_type = ly_type
-            if ly_type == kCConvolution: cudnn_ly_type = kCudnnConv
-            elif ly_type == kCPooling: cudnn_ly_type = kCudnnPool
-            elif ly_type == kLRN: cudnn_ly_type = kCudnnLRN
-            elif ly_type == kSoftmax: cudnn_ly_type = kCudnnSoftmax
-            elif ly_type == kSoftmaxLoss: cudnn_ly_type = kCudnnSoftmaxLoss
-            elif ly_type == kActivation:
-                cudnn_ly_type = kCudnnActivation
-            elif ly_type == kSTanh:
-                print 'Error report: STanh layer is not supported for GPU'
-            '''
-            elif ly_type == kReLU:
-                cudnn_ly_type = kCudnnActivation
-                net.layer[i].activation_conf.type = RELU
-            elif ly_type == kSigmoid:
-                cudnn_ly_type = kCudnnActivation
-                net.layer[i].activation_conf.type = SIGMOID
-            elif ly_type == kTanh:
-                cudnn_ly_type = kCudnnActivation
-                net.layer[i].activation_conf.type = TANH
-            '''
-            #elif ly_type == kSTanh:
-            #    print 'Error report: STanh layer is not supported for GPU'
-                #cudnn_ly_type = kCudnnActivation
-                #net.layer[i].activation_conf.type = STANH
-            net.layer[i].type = cudnn_ly_type
-
-    def show(self):
-        for ly in self.jobconf.neuralnet.layer:
-            print layer(ly.name)
-
-    def layer_by_id(self, k):
-        return self.jobconf.neuralnet.layer[k]
-
-    def layer_by_name(self, name):
-        return self.layers[k]
-
-    def size(self):
-        return len(self.jobconf.neuralnet.layer)
-
-class Energy(Model):
-    ''' energy model
-    '''
-
-    def __init__(self, name='my model', argv=[], label=False):
-        super(Energy, self).__init__(name=name, argv=argv, label=label)
-
-    def add(self, layer):
-        if hasattr(layer, 'layer_type'):
-            if layer.layer_type == kRBMVis:
-                dim = 0
-                for i in range(1, len(layer.out_dim)):
-                    parw = Parameter(name='w', init='none', level=i)
-                    parb = Parameter(name='b', init='none', level=i)
-                    dim = layer.out_dim[i-1]
-                    self.layers.append(Dense(dim, w_param=parw, b_param=parb,
-                                             activation='sigmoid'))
-                self.layers.append(layer)
-
-class Sequential(Model):
-    ''' sequential model
-    '''
-
-    def __init__(self, name='my model', argv=[], label=False):
-        super(Sequential, self).__init__(name=name, argv=argv, label=label)
-
-    def add(self, layer):
-        if hasattr(layer, 'layer_type'):
-            if layer.layer_type == 'AutoEncoder':
-                dim = 0
-                if layer.param_share == True:
-                    # Encoding
-                    for i in range(1, len(layer.hid_dim)+1):
-                        parw = Parameter(name='w',
-                                         init='none', level=i)
-                        parb = Parameter(name='b',
-                                         init='none', level=i)
-                        dim = layer.hid_dim[i-1]
-                        if i == len(layer.hid_dim): activation = None
-                        else: activation = layer.activation
-                        self.layers.append(Dense(dim,
-                                                 w_param=parw, b_param=parb,
-                                                 activation=activation))
-                    # Decoding
-                    for i in range(len(layer.hid_dim), 0, -1):
-                        parw = Parameter(name=generate_name('w', 2),
-                                         init='none')
-                        parb = Parameter(name=generate_name('b', 2),
-                                         init='none')
-                        setval(parw.param, share_from='w'+str(i))
-                        setval(parb.param, name='b'+str(i))
-                        if i == 1: dim = layer.out_dim
-                        else: dim = layer.hid_dim[i-2]
-                        self.layers.append(Dense(dim,
-                                                 w_param=parw, b_param=parb,
-                                                 activation=layer.activation,
-                                                 transpose=True))
-                else:
-                    # MLP
-                    for i in range(1, len(layer.hid_dim)+2):
-                        parw = Parameter(name='w',
-                                         init='none', level=i)
-                        parb = Parameter(name='b',
-                                         init='none', level=i)
-                        if i == len(layer.hid_dim)+1: dim = layer.out_dim
-                        else: dim = layer.hid_dim[i-1]
-                        self.layers.append(Dense(dim,
-                                                 w_param=parw, b_param=parb,
-                                                 activation=layer.activation))
-            else:
-                self.layers.append(layer)
-        else:
-            self.layers.append(layer)
-
-
-class Store(object):
-
-    def __init__(self, **kwargs):
-        '''
-        **kwargs
-            path       = (string)  // path to dataset
-            backend    = (string)  //
-            batch_size = (int)     // batch size of dataset
-            shape      = (int)     //
-        '''
-        self.proto = Message('Store', **kwargs).proto
-
-class Algorithm(object):
-
-    def __init__(self, type=enumAlgType('bp'), **kwargs):
-        '''
-        type = (string)  // type of algorithm, bp at default
-        '''
-        alg = Message('Alg', alg=type, **kwargs).proto
-        if type == enumAlgType('cd'):
-            setval(alg.cd_conf, **kwargs)
-        self.proto = alg
-
-class Updater(object):
-
-    def __init__(self, upd_type, lr, lr_type,
-                 decay, momentum,
-                 step, step_lr, **fields):
-        '''
-        required
-          upd_type = (enum)   // enum type of updater
-          lr       = (float)  // base learning rate
-        optional
-          lr_type  = (string) // type of the learning rate (Fixed at default)
-        '''
-        upd = Message('Updater', type=upd_type, **fields).proto
-        setval(upd.learning_rate, base_lr=lr)
-        if decay > 0:
-            setval(upd, weight_decay=decay)
-        if momentum > 0:
-            setval(upd, momentum=momentum)
-
-        if lr_type == None or lr_type == "fixed":
-            setval(upd.learning_rate, type=kFixed)
-        elif lr_type == 'step':
-            cp = Message('Step', change_freq=60, gamma=0.997)
-            setval(upd.learning_rate, type=kStep, step_conf=cp.proto)
-        elif lr_type == 'manual':
-            cp = Message('FixedStep', step=step, step_lr=step_lr)
-            setval(upd.learning_rate, type=kFixedStep, fixedstep_conf=cp.proto)
-        elif lr_type == 'linear':
-            cp = Message('Linear', change_freq=10, final_lr=0.1)
-            setval(upd.learning_rate, type=kLinear, linear_conf=cp.proto)
-
-        self.proto = upd
-        self.singaupdater = None
-
-    def Update(self, step, layer):
-        ''' This method updates parameters of layer
-            step = (int)  // training step, i.e., param version
-        '''
-        if self.singaupdater == None:
-            self.singaupdater = SingaUpdater.CreateUpdater(
-                                  self.proto.SerializeToString())
-
-        # update parameters
-        singaParams = layer.singalayer.GetParams()
-        for par in singaParams:
-            self.singaupdater.Update(step, par, 1.0)
-    
-
-class SGD(Updater):
-
-    def __init__(self, lr=0.01, lr_type=None,
-                 decay=0, momentum=0,
-                 step=(0), step_lr=(0.01), **fields):
-        '''
-        required
-           lr       = (float)      // base learning rate
-        optional
-           lr_type  = (string)     // type of learning rate, 'Fixed' at default
-           decay    = (float)      // weight decay
-           momentum = (float)      // momentum
-           step     = (int/list)   // steps
-           step_lr  = (float/list) // learning rate after the steps
-           **fields (KEY=VALUE)
-        '''
-        assert lr
-        super(SGD, self).__init__(upd_type=kSGD,
-                                  lr=lr, lr_type=lr_type,
-                                  decay=decay, momentum=momentum,
-                                  step=step, step_lr=step_lr, **fields)
-
-class AdaGrad(Updater):
-
-    def __init__(self, lr=0.01, lr_type=None,
-                 decay=0, momentum=0,
-                 step=(0), step_lr=(0.01), **fields):
-        '''
-        required
-           lr       = (float)      // base learning rate
-        optional
-           lr_type  = (string)     // type of learning rate, 'Fixed' at default
-           decay    = (float)      // weight decay
-           momentum = (float)      // momentum
-           step     = (int/list)   // steps
-           step_lr  = (float/list) // learning rate after the steps
-           **fields (KEY=VALUE)
-        '''
-        assert lr
-        super(AdaGrad, self).__init__(upd_type=kAdaGrad,
-                                  lr=lr, lr_type=lr_type,
-                                  decay=decay, momentum=momentum,
-                                  step=step, step_lr=step_lr, **fields)
-
-class Cluster(object):
-    """ Specify the cluster topology, e.g., number of workers/servers.
-
-    Currently we need to create this object in the .py file and also provide a
-    cluster configuration file to the command line. TODO(wangwei) update SINGA
-    code to eliminate the requirement of the cluster configuration file for
-    training on a single node or the cluster object in the pyfile for training
-    in a cluster.
-    """
-
-    def __init__(self, workspace=None,
-                 nworker_groups=1, nserver_groups=1,
-                 nworkers_per_group=1, nservers_per_group=1,
-                 nworkers_per_procs=1, nservers_per_procs=1,
-                 **fields):
-        '''
-        required
-          workspace = (string) // workspace path
-        optional
-          nworker_groups     = (int)
-          nserver_groups     = (int)
-          nworkers_per_group = (int)
-          nservers_per_group = (int)
-          nworkers_per_procs = (int)
-          nservers_per_procs = (int)
-          **fields
-            server_worker_separate = (bool)
-        '''
-        assert workspace != None, 'need to set workspace'
-        self.proto = Message('Cluster', workspace=workspace).proto
-        # optional
-        self.proto.nworker_groups = nworker_groups
-        self.proto.nserver_groups = nserver_groups
-        self.proto.nworkers_per_group = nworkers_per_group
-        self.proto.nservers_per_group = nservers_per_group
-        self.proto.nworkers_per_procs = nworkers_per_procs
-        self.proto.nservers_per_procs = nservers_per_procs
-        # other fields
-        setval(self.proto, **fields)
-
-
-def StoreResults(lines):
-    """ Parsing metrics from each line in the log file.
-
-    TODO(wangwei) format the log string to make them uniform for easy parsing
-    Another approach is creating a protobuf message for metrics, which can be
-    used for dumping metrics to string and loading perf string back to messages.
-    """
-
-    resultDic = {}
-    for line in lines:
-        line = re.findall(r'[\w|*.*]+', line)
-        if 'Train' in line:
-            step = line[line.index('step')+1]
-            if 'accuracy' in line:
-                resultDic.setdefault(step, {})['acc'] \
-                                             = line[line.index('accuracy')+1]
-            if 'loss' in line:
-                resultDic.setdefault(step, {})['loss'] \
-                                             = line[line.index('loss')+1]
-            if 'ppl' in line:
-                resultDic.setdefault(step, {})['ppl'] \
-                                             = line[line.index('ppl')+1]
-            if 'Squared' in line:
-                resultDic.setdefault(step, {})['se'] \
-                                             = line[line.index('Squared')+2]
-    return resultDic
-
-def SingaRun(jobproto='', argv=None, execpath='', testmode=False):
-    """
-    Run Singa and receive the training/test results.
-    """
-
-    import singa.driver as driver
-    d = driver.Driver()
-    d.InitLog(argv[0])
-    d.Init(argv)
-    if testmode == True:
-        d.Test(jobproto.SerializeToString())
-    else:
-        d.Train(False, jobproto.SerializeToString())
-
-    # Get the performance from the latest log file.
-    # TODO(wangwei) the log file would be overwritten by other running instance
-    # of the same program, e.g., lt-singa
-    logfile = '/tmp/singa-log/{0}.ERROR'.format(argv[0].split('/')[-1])
-    fin = open(logfile, 'r')
-    result = StoreResults(fin.readlines())
-
-    return result
-
-def SingaRun_script(filename='', execpath=''):
-    """
-    Deprecated.
-    Generate the job conf file and run the shell command.
-    """
-    SINGAROOT = '../../../'
-    conf = 'examples/' + filename
-    if execpath == '':
-        cmd = SINGAROOT+'bin/singa-run.sh ' \
-            + '-conf %s ' % conf
-    else:
-        cmd = SINGAROOT+'bin/singa-run.sh ' \
-            + '-conf %s ' % conf \
-            + '-exec %s ' % execpath
-
-    procs = subprocess.Popen(cmd.strip().split(' '),
-                             stdout=subprocess.PIPE,
-                             stderr=subprocess.STDOUT)
-
-    resultDic = {}
-    outputlines = iter(procs.stdout.readline, '')
-    resultDic = StoreResults(outputlines)
-
-    #TODO better format to store the result??
-    return resultDic
-
-def load_model_parameter(fin, neuralnet, batchsize=1, data_shape=None):
-    """
-    this method loads model parameter
-    """
-    hly_idx = 0
-    for i in range(len(neuralnet)): 
-        if neuralnet[i].is_datalayer:
-            if data_shape == None:
-                shape = neuralnet[i].shape
-                shape[0] = batchsize
-                neuralnet[i].setup(shape)
-            else:
-                neuralnet[i].setup(data_shape)
-        else:
-            hly_idx = i
-            break
-
-    net = layerVector(len(neuralnet)-hly_idx)
-    for i in range(hly_idx, len(neuralnet)): 
-        if neuralnet[i].src==None:
-            neuralnet[i].setup(neuralnet[i-1])
-        else:
-            neuralnet[i].setup(neuralnet[i].src)
-        net[i-hly_idx] = neuralnet[i].singalayer
-
-    from singa.driver import Worker
-    alg = Algorithm(type=enumAlgType('bp')).proto
-    w = Worker.CreateWorker(alg.SerializeToString())
-    w.InitNetParams(fin, net)
-
-def save_model_parameter(step, fout, neuralnet):
-    """
-    this method saves model parameter
-    """
-    hly_idx = 0
-    for i in range(len(neuralnet)): 
-        if not neuralnet[i].is_datalayer:
-            hly_idx = i
-            break
-
-    from singa.driver import Worker
-    net = layerVector(len(neuralnet)-hly_idx)
-    for i in range(hly_idx, len(neuralnet)): 
-        net[i-hly_idx] = neuralnet[i].singalayer
-    alg = Algorithm(type=enumAlgType('bp')).proto
-    w = Worker.CreateWorker(alg.SerializeToString())
-    w.Checkpoint(step, fout, net)
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/parameter.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/parameter.py b/tool/python/singa/parameter.py
deleted file mode 100644
index 14ad852..0000000
--- a/tool/python/singa/parameter.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-'''
-This script includes Parameter class and a method, named set_param_field
-that users can configure Param and ParamGen protos.
-'''
-
-from singa.initializations import get_init_values
-from singa.utils.utility import setval, generate_name
-from singa.utils.message import *
-from google.protobuf import text_format
-
-
-class Parameter(object):
-
-    def __init__(self, **kwargs):
-        '''
-	optional
-	  **kwargs
-	    name  = (string) // parameter name
-	    lr    = (float)  // learning rate multiplier
-	    wd    = (float)  // weight decay multiplier
-	    init  = (string) // init type {'constant','uniform','gaussian'}
-	    value = (int)    // value for 'constant'
-	    scale = (float)  // [low=-scale, high=scale] for 'uniform'
-	    low   = (float)  // low value   for 'uniform'
-	    high  = (float)  // high value  for 'uniform'
-	    mean  = (float)  // mean for 'gaussian'
-	    std   = (float)  // std  for 'gaussian'
-	'''
-        fields = {'lr_scale' : kwargs['lr'] if 'lr' in kwargs else 1,
-                  'wd_scale' : kwargs['wd'] if 'wd' in kwargs else 1
-                 }
-        self.param = Message('Param', **fields).proto
-
-        if not 'name' in kwargs:
-            setval(self.param, name=generate_name('param', 1))
-        else:
-            pname = kwargs['name']
-            # parameter name for RBM
-            if 'level' in kwargs:
-                pname += str(kwargs['level'])
-                if pname[0] == 'b':
-                    pname += '2'
-            setval(self.param, name=pname)
-
-        if 'share_from' in kwargs:
-            setval(self.param, share_from=kwargs['share_from'])
-
-        if 'init' in kwargs:
-            init_values = get_init_values(kwargs['init'], **kwargs)
-            if not kwargs['init'] == 'none':
-                pgen = Message('ParamGen', type=enumInitMethod(kwargs['init']),
-                               **init_values)
-                del kwargs['init']
-                setval(self.param, init=pgen.proto)
-        else: # default: uniform
-            pgen = Message('ParamGen', type=enumInitMethod('uniform'))
-            setval(self.param, init=pgen.proto)
-
-    def update(self, **fields):
-        setval(self.param, **fields)
-        setval(self.param.init, **fields)
-
-
-def set_param_field(param, pname, changename=False, withnumber=True, **kwargs):
-    '''
-      param      = (ParamProto)
-      pname      = (string)     // 'w' for wiehgt, or 'b' for bias
-      changename = (bool)       // update parameter name if True
-      withnumber = (bool)       // add layer number if True
-      **kwargs
-        w_lr = (float) // learning rate multiplier for weight, used to
-                       // scale the learning rate when updating parameters.
-        w_wd = (float) // weight decay multiplier for weight, used to
-                       // scale the weight decay when updating parameters.
-        b_lr = (float) // learning rate multiplier for bias 
-        b_wd = (float) // weight decay multiplier for bias
-    '''
-    assert pname == 'w' or pname == 'b', 'pname should be w or b'
-
-    lr_ = param.lr_scale
-    wd_ = param.wd_scale
-    initkv = {}
-
-    if pname == 'w':
-        if 'w_lr' in kwargs:
-            lr_ = kwargs['w_lr']
-            del kwargs['w_lr']
-        if 'w_wd' in kwargs:
-            wd_ = kwargs['w_wd']
-            del kwargs['w_wd']
-        for key, val in kwargs.items():
-            if key.startswith('w_'):
-                initkv[key[2:]] = val
-
-    elif pname == 'b':
-        if 'b_lr' in kwargs:
-            lr_ = kwargs['b_lr']
-            del kwargs['b_lr']
-        if 'b_wd' in kwargs:
-            wd_ = kwargs['b_wd']
-            del kwargs['b_wd']
-        for key, val in kwargs.items():
-            if key.startswith('b_'):
-                initkv[key[2:]] = val
-
-    field = {'lr_scale' : lr_, 'wd_scale' : wd_}
-
-    # Set/update parameter fields
-    if param.name.startswith('param') or changename == True:
-        if 'level' in kwargs:  # parameter name for RBM
-            pname += str(kwargs['level'])
-        setval(param, name=generate_name(pname, withnumber=withnumber), **field)
-    else:
-        setval(param, **field)
-
-    # Set/update parameter init fields
-    setval(param.init, **initkv)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/utils/__init__.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/utils/__init__.py b/tool/python/singa/utils/__init__.py
deleted file mode 100644
index a796a7a..0000000
--- a/tool/python/singa/utils/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed9587c0/tool/python/singa/utils/message.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/utils/message.py b/tool/python/singa/utils/message.py
deleted file mode 100644
index bfa9ef2..0000000
--- a/tool/python/singa/utils/message.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-
-#/************************************************************
-#*
-#* Licensed to the Apache Software Foundation (ASF) under one
-#* or more contributor license agreements.  See the NOTICE file
-#* distributed with this work for additional information
-#* regarding copyright ownership.  The ASF licenses this file
-#* to you under the Apache License, Version 2.0 (the
-#* "License"); you may not use this file except in compliance
-#* with the License.  You may obtain a copy of the License at
-#*
-#*   http://www.apache.org/licenses/LICENSE-2.0
-#*
-#* Unless required by applicable law or agreed to in writing,
-#* software distributed under the License is distributed on an
-#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#* KIND, either express or implied.  See the License for the
-#* specific language governing permissions and limitations
-#* under the License.
-#*
-#*************************************************************/
-
-import sys, os
-from utility import *
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../pb2'))
-
-'''
-This script reads proto files in ../../pb2, generated by proto buffer compiler.
- - Message class creates an object for proto and sets initial vlaues for
-   the fields, specified by kwargs
- - make_function method generates a method named enumInitMethod that returns
-   enum values of given enum type, defined in the proto files
-'''
-
-MODULE_LIST = []
-
-# import all modules in dir singa_root/tool/python/pb2
-# except common, singa, and __init__
-for f in os.listdir(os.path.join(os.path.dirname(__file__), '../../pb2')):
-    if (f.endswith(".pyc")):
-        continue
-    if(f == "__init__.py" or f == "common_pb2.py" or f == "singa_pb2.py"):
-        continue
-    module_name = f.split('.')[0]
-    module_obj = __import__(module_name)
-    MODULE_LIST.append(module_obj)
-    for func_name in dir(module_obj):
-        if not func_name.startswith("__"):
-            globals()[func_name] = getattr(module_obj, func_name)
-
-class Message(object):
-    def __init__(self, protoname, **kwargs):
-        for module_obj in MODULE_LIST:
-            if hasattr(module_obj, protoname+"Proto"):
-                class_ = getattr(module_obj, protoname+"Proto")
-                self.proto = class_()
-                return setval(self.proto, **kwargs)
-        raise Exception('invalid protoname')
-
-enumDict_ = dict()
-
-#get all enum type list in the modules
-for module_obj in MODULE_LIST:
-    for enumtype in module_obj.DESCRIPTOR.enum_types_by_name:
-        tempDict = enumDict_[enumtype] = dict()
-        for name in getattr(module_obj, enumtype).DESCRIPTOR.values_by_name:
-            tempDict[name[1:].lower()] = getattr(module_obj, name)
-
-def make_function(enumtype):
-    def _function(key):
-        return enumDict_[enumtype][key]
-    return _function
-
-current_module = sys.modules[__name__]
-
-#def all the enumtypes
-for module_obj in MODULE_LIST:
-    for enumtype in module_obj.DESCRIPTOR.enum_types_by_name:
-        setattr(current_module, "enum"+enumtype, make_function(enumtype))

[02/51] [abbrv] incubator-singa git commit: SINGA-235 - Unify the engines for cudnn and singa layers

Posted by wa...@apache.org.

SINGA-235 - Unify the engines for cudnn and singa layers

For most layers, we would have multiple implementations, e.g., using
cudnn for nvidia gpu, using cpp for cpu and using opencl for other gpus.

These layers have different classes. They are registered with different
identifiers. This ticket would unify the layer identifiers for each
engine:
1. cudnn layers are registered with identifier = cudnn_xxx, e.g.,
cudnn_convolution for the CudnnConvolution layer.
2. singa layers are registered with identifier = singa_xxx, e.g.,
singa_convolution for the Convolution layer.

cudnn engine must run on cuda devices. and singa engine could run on
cuda-gpu device or cpp-cpu device depending on the layer type. For
instance, the Convolution layer must run on cpp-cpu device, and Dense
layer can run on both devices and would select the correct device
automatically.
Users need to make sure the engine and the device of the tensors.

Both CPP and Python code is updated. Users have to compose the layer
identifier manually for CPP version. For Python version, users can set
layer.engine='cudnn' or 'singa'.

All identifiers are case insensitive.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/05720c21
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/05720c21
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/05720c21

Branch: refs/heads/master
Commit: 05720c21636c0fd55770206176a60a9ab20ae16c
Parents: 53639b7
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Wed Aug 10 21:05:22 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 11 10:52:01 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/README.md            |  17 +--
 examples/char-rnn/train.py             |   2 +-
 examples/cifar10/alexnet-parallel.cc   |  84 ++++-------
 examples/cifar10/alexnet.cc            |  58 ++++---
 examples/cifar10/alexnet.py            |   5 +-
 examples/cifar10/train.py              |  33 ++--
 examples/cifar10/vgg-parallel.cc       |  91 ++++++-----
 examples/cifar10/vgg.py                |  24 +--
 examples/imagenet/alexnet.cc           |  22 +--
 include/singa/core/device.h            |  30 ++--
 include/singa/model/feed_forward_net.h |  12 +-
 include/singa/model/layer.h            |   4 +-
 include/singa/utils/integer.h          |  73 +++++++++
 src/core/device/platform.cc            |   4 +-
 src/core/tensor/tensor.cc              |   4 +-
 src/core/tensor/tensor_math_cpp.h      |   2 +-
 src/model/feed_forward_net.cc          |  17 +--
 src/model/layer/activation.cc          |  27 ++--
 src/model/layer/activation.h           |   2 +-
 src/model/layer/batchnorm.cc           |   6 +-
 src/model/layer/batchnorm.h            |   2 +-
 src/model/layer/convolution.cc         |   2 +-
 src/model/layer/convolution.h          |   2 +-
 src/model/layer/cudnn_activation.cc    |  10 +-
 src/model/layer/cudnn_activation.h     |   2 +-
 src/model/layer/cudnn_batchnorm.cc     |   2 +-
 src/model/layer/cudnn_batchnorm.h      |   2 +-
 src/model/layer/cudnn_convolution.cc   |   2 +-
 src/model/layer/cudnn_convolution.h    |   2 +-
 src/model/layer/cudnn_dropout.cc       |   2 +-
 src/model/layer/cudnn_dropout.h        |   2 +-
 src/model/layer/cudnn_lrn.cc           |   2 +-
 src/model/layer/cudnn_lrn.h            |   2 +-
 src/model/layer/cudnn_pooling.cc       |   2 +-
 src/model/layer/cudnn_pooling.h        |   2 +-
 src/model/layer/cudnn_rnn.cc           |  17 +--
 src/model/layer/cudnn_rnn.h            |   2 +-
 src/model/layer/cudnn_softmax.cc       |   2 +-
 src/model/layer/cudnn_softmax.h        |   2 +-
 src/model/layer/cudnn_utils.h          |   2 +-
 src/model/layer/dense.cc               |   2 +-
 src/model/layer/dense.h                |   2 +-
 src/model/layer/dropout.cc             |   2 +-
 src/model/layer/dropout.h              |   2 +-
 src/model/layer/flatten.cc             |   2 +-
 src/model/layer/flatten.h              |   2 +-
 src/model/layer/lrn.cc                 |   2 +-
 src/model/layer/lrn.h                  |   4 +-
 src/model/layer/pooling.cc             |   2 +-
 src/model/layer/pooling.h              |   2 +-
 src/model/layer/prelu.cc               |   4 +-
 src/model/layer/prelu.h                |   2 +-
 src/model/layer/rnn.cc                 |   2 +-
 src/model/layer/rnn.h                  |   2 +-
 src/model/layer/softmax.cc             |   2 +-
 src/model/layer/softmax.h              |   2 +-
 src/python/singa/device.py             |  13 ++
 src/python/singa/layer.py              | 226 ++++++++++++++++++----------
 src/python/singa/net.py                |   6 +-
 src/python/singa/tensor.py             |   7 +-
 src/python/swig/core_device.i          |   4 +
 src/python/swig/model_layer.i          |   3 -
 test/singa/test_activation.cc          |  26 ++--
 test/singa/test_batchnorm.cc           |  26 ++--
 test/singa/test_convolution.cc         |   2 +-
 test/singa/test_cudnn_activation.cc    |  28 ++--
 test/singa/test_cudnn_batchnorm.cc     |   2 +-
 test/singa/test_cudnn_convolution.cc   |   4 +-
 test/singa/test_cudnn_dropout.cc       |   2 +-
 test/singa/test_cudnn_lrn.cc           |   2 +-
 test/singa/test_cudnn_pooling.cc       |   2 +-
 test/singa/test_cudnn_rnn.cc           |   2 +-
 test/singa/test_cudnn_softmax.cc       |   2 +-
 test/singa/test_dense.cc               |   2 +-
 test/singa/test_dropout.cc             |   2 +-
 test/singa/test_flatten.cc             |   2 +-
 test/singa/test_layer.cc               |  19 ++-
 test/singa/test_lrn.cc                 |   2 +-
 test/singa/test_pooling.cc             |   2 +-
 test/singa/test_prelu.cc               |   2 +-
 test/singa/test_softmax.cc             |   2 +-
 81 files changed, 573 insertions(+), 433 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/char-rnn/README.md
----------------------------------------------------------------------
diff --git a/examples/char-rnn/README.md b/examples/char-rnn/README.md
index c5cdbb8..d4cfa30 100644
--- a/examples/char-rnn/README.md
+++ b/examples/char-rnn/README.md
@@ -1,10 +1,10 @@
 # Train Char-RNN using SINGA
 
 Recurrent neural networks (RNN) are widely used for modelling sequential data,
-e.g., natural language sentences. This example describe how to implement a RNN
+e.g., natural language sentences. This example describes how to implement a RNN
 application (or model) using SINGA's RNN layers.
-We will use the [char-rnn](https://github.com/karpathy/char-rnn) modle as an
-example, which trains over setences or
+We will use the [char-rnn](https://github.com/karpathy/char-rnn) model as an
+example, which trains over sentences or
 source code, with each character as an input unit. Particularly, we will train
 a RNN using GRU over Linux kernel source code. After training, we expect to
 generate meaningful code from the model.
@@ -12,20 +12,19 @@ generate meaningful code from the model.
 
 ## Instructions
 
-* Compile and install SINGA. Currently the RNN implmentation depends on Cudnn V5.
+* Compile and install SINGA. Currently the RNN implementation depends on Cudnn with version >= 5.05.
 
 * Prepare the dataset. Download the [kernel source code](http://cs.stanford.edu/people/karpathy/char-rnn/).
 Other plain text files can also be used.
 
 * Start the training,
 
-    python train.py input_linux.txt
+        python train.py input_linux.txt
 
   Some hyper-parameters could be set through command line,
 
-    python train.py -h
+        python train.py -h
 
+* Sample characters from the model by providing the number of characters to sample and the seed string.
 
-* Sample characters from the model by providing num of characters and the seed string.
-
-    python sample.py 100 --seed '#include <std'
+        python sample.py 100 --seed '#include <std'

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 22fdc82..3dfa0d9 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -195,7 +195,7 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Train multi-stack LSTM for '\
             'modeling  character sequence from plain text files')
-    parser.add_argument('data', type=string, help='training file')
+    parser.add_argument('data', type=str, help='training file')
     parser.add_argument('-b', type=int, default=32, help='batch_size')
     parser.add_argument('-l', type=int, default=64, help='sequence length')
     parser.add_argument('-d', type=int, default=128, help='hidden size')

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/cifar10/alexnet-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet-parallel.cc b/examples/cifar10/alexnet-parallel.cc
index 15ef58e..8cc3352 100644
--- a/examples/cifar10/alexnet-parallel.cc
+++ b/examples/cifar10/alexnet-parallel.cc
@@ -28,21 +28,17 @@
 #include "singa/utils/channel.h"
 #include "singa/utils/string.h"
 #include "singa/core/memory.h"
-#include "../../src/model/layer/cudnn_convolution.h"
-#include "../../src/model/layer/cudnn_activation.h"
-#include "../../src/model/layer/cudnn_pooling.h"
-#include "../../src/model/layer/cudnn_lrn.h"
-#include "../../src/model/layer/dense.h"
-#include "../../src/model/layer/flatten.h"
 #include <thread>
 #include <memory>
+
 namespace singa {
+const std::string engine = "cudnn";
 
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnConvolution");
+  conf.set_type(engine + "_convolution");
   ConvolutionConf *conv = conf.mutable_convolution_conf();
   conv->set_num_output(nb_filter);
   conv->add_kernel_size(kernel);
@@ -67,7 +63,7 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
                          int pad) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnPooling");
+  conf.set_type(engine + "_pooling");
   PoolingConf *pool = conf.mutable_pooling_conf();
   pool->set_kernel_size(kernel);
   pool->set_stride(stride);
@@ -79,14 +75,14 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
 LayerConf GenReLUConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("RELU");
+  conf.set_type(engine + "_relu");
   return conf;
 }
 
 LayerConf GenDenseConf(string name, int num_output, float std, float wd) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Dense");
+  conf.set_type("singa_dense");
   DenseConf *dense = conf.mutable_dense_conf();
   dense->set_num_output(num_output);
 
@@ -108,7 +104,7 @@ LayerConf GenDenseConf(string name, int num_output, float std, float wd) {
 LayerConf GenLRNConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnLRN");
+  conf.set_type(engine + "_lrn");
   LRNConf *lrn = conf.mutable_lrn_conf();
   lrn->set_local_size(3);
   lrn->set_alpha(5e-05);
@@ -119,7 +115,7 @@ LayerConf GenLRNConf(string name) {
 LayerConf GenFlattenConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Flatten");
+  conf.set_type("singa_flatten");
   return conf;
 }
 
@@ -127,20 +123,19 @@ FeedForwardNet CreateNet() {
   FeedForwardNet net;
   Shape s{3, 32, 32};
 
-  net.Add(new CudnnConvolution(), GenConvConf("conv1", 32, 5, 1, 2, 0.0001),
-          &s);
-  net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 3, 2, 1));
-  net.Add(new CudnnActivation(), GenReLUConf("relu1"));
-  net.Add(new CudnnLRN(), GenLRNConf("lrn1"));
-  net.Add(new CudnnConvolution(), GenConvConf("conv2", 32, 5, 1, 2, 0.01));
-  net.Add(new CudnnActivation(), GenReLUConf("relu2"));
-  net.Add(new CudnnPooling(), GenPoolingConf("pool2", false, 3, 2, 1));
-  net.Add(new CudnnLRN(), GenLRNConf("lrn2"));
-  net.Add(new CudnnConvolution, GenConvConf("conv3", 64, 5, 1, 2, 0.01));
-  net.Add(new CudnnActivation(), GenReLUConf("relu3"));
-  net.Add(new CudnnPooling(), GenPoolingConf("pool3", false, 3, 2, 1));
-  net.Add(new Flatten(), GenFlattenConf("flat"));
-  net.Add(new Dense(), GenDenseConf("ip", 10, 0.01, 250));
+  net.Add(GenConvConf("conv1", 32, 5, 1, 2, 0.0001), &s);
+  net.Add(GenPoolingConf("pool1", true, 3, 2, 1));
+  net.Add(GenReLUConf("relu1"));
+  net.Add(GenLRNConf("lrn1"));
+  net.Add(GenConvConf("conv2", 32, 5, 1, 2, 0.01));
+  net.Add(GenReLUConf("relu2"));
+  net.Add(GenPoolingConf("pool2", false, 3, 2, 1));
+  net.Add(GenLRNConf("lrn2"));
+  net.Add(GenConvConf("conv3", 64, 5, 1, 2, 0.01));
+  net.Add(GenReLUConf("relu3"));
+  net.Add(GenPoolingConf("pool3", false, 3, 2, 1));
+  net.Add(GenFlattenConf("flat"));
+  net.Add(GenDenseConf("ip", 10, 0.01, 250));
   return net;
 }
 
@@ -228,35 +223,18 @@ void Train(float lr, int num_epoch, string data_dir) {
   mem_conf.add_device(0);
   mem_conf.add_device(1);
   std::shared_ptr<DeviceMemPool> mem_pool(new CnMemPool(mem_conf));
-  std::shared_ptr<CudaGPU> cuda_1(new CudaGPU(0, mem_pool));
-  std::shared_ptr<CudaGPU> cuda_2(new CudaGPU(1, mem_pool));
-  net_1.ToDevice(cuda_1);
-  net_2.ToDevice(cuda_2);
-
-  /*
-  // this does not work for net_2
-  train_x_2.ResetLike(train_x);
-  train_y_2.ResetLike(train_y);
-  test_x_2.ResetLike(test_x);
-  test_y_2.ResetLike(test_y);
-
-  train_x.ToDevice(cuda_1);
-  train_y.ToDevice(cuda_1);
-  test_x.ToDevice(cuda_1);
-  test_y.ToDevice(cuda_1);
+  std::shared_ptr<CudaGPU> dev_1(new CudaGPU(0, mem_pool));
+  std::shared_ptr<CudaGPU> dev_2(new CudaGPU(1, mem_pool));
 
-  train_x_2.ToDevice(cuda_2);
-  train_y_2.ToDevice(cuda_2);
-  test_x_2.ToDevice(cuda_2);
-  test_y_2.ToDevice(cuda_2);
-  */
+  net_1.ToDevice(dev_1);
+  net_2.ToDevice(dev_2);
 
-  train_x_1.ToDevice(cuda_1);
-  train_y_1.ToDevice(cuda_1);
-  test_x.ToDevice(cuda_1);
-  test_y.ToDevice(cuda_1);
-  train_x_2.ToDevice(cuda_2);
-  train_y_2.ToDevice(cuda_2);
+  train_x_1.ToDevice(dev_1);
+  train_y_1.ToDevice(dev_1);
+  test_x.ToDevice(dev_1);
+  test_y.ToDevice(dev_1);
+  train_x_2.ToDevice(dev_2);
+  train_y_2.ToDevice(dev_2);
 
   // net.Train(100, num_epoch, train_x, train_y, test_x, test_y);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index 6480557..e1363e4 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -26,19 +26,14 @@
 #include "singa/model/metric.h"
 #include "singa/utils/channel.h"
 #include "singa/utils/string.h"
-#include "../../src/model/layer/cudnn_convolution.h"
-#include "../../src/model/layer/cudnn_activation.h"
-#include "../../src/model/layer/cudnn_pooling.h"
-#include "../../src/model/layer/cudnn_lrn.h"
-#include "../../src/model/layer/dense.h"
-#include "../../src/model/layer/flatten.h"
 namespace singa {
 
+const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnConvolution");
+  conf.set_type(engine + "_convolution");
   ConvolutionConf *conv = conf.mutable_convolution_conf();
   conv->set_num_output(nb_filter);
   conv->add_kernel_size(kernel);
@@ -63,7 +58,7 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
                          int pad) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnPooling");
+  conf.set_type(engine + "_pooling");
   PoolingConf *pool = conf.mutable_pooling_conf();
   pool->set_kernel_size(kernel);
   pool->set_stride(stride);
@@ -75,14 +70,14 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
 LayerConf GenReLUConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("RELU");
+  conf.set_type(engine + "_relu");
   return conf;
 }
 
 LayerConf GenDenseConf(string name, int num_output, float std, float wd) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Dense");
+  conf.set_type("singa_dense");
   DenseConf *dense = conf.mutable_dense_conf();
   dense->set_num_output(num_output);
 
@@ -104,7 +99,7 @@ LayerConf GenDenseConf(string name, int num_output, float std, float wd) {
 LayerConf GenLRNConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnLRN");
+  conf.set_type(engine + "_lrn");
   LRNConf *lrn = conf.mutable_lrn_conf();
   lrn->set_local_size(3);
   lrn->set_alpha(5e-05);
@@ -115,7 +110,7 @@ LayerConf GenLRNConf(string name) {
 LayerConf GenFlattenConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Flatten");
+  conf.set_type("singa_flatten");
   return conf;
 }
 
@@ -123,20 +118,19 @@ FeedForwardNet CreateNet() {
   FeedForwardNet net;
   Shape s{3, 32, 32};
 
-  net.Add(new CudnnConvolution(), GenConvConf("conv1", 32, 5, 1, 2, 0.0001),
-          &s);
-  net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 3, 2, 1));
-  net.Add(new CudnnActivation(), GenReLUConf("relu1"));
-  net.Add(new CudnnLRN(), GenLRNConf("lrn1"));
-  net.Add(new CudnnConvolution(), GenConvConf("conv2", 32, 5, 1, 2, 0.01));
-  net.Add(new CudnnActivation(), GenReLUConf("relu2"));
-  net.Add(new CudnnPooling(), GenPoolingConf("pool2", false, 3, 2, 1));
-  net.Add(new CudnnLRN(), GenLRNConf("lrn2"));
-  net.Add(new CudnnConvolution, GenConvConf("conv3", 64, 5, 1, 2, 0.01));
-  net.Add(new CudnnActivation(), GenReLUConf("relu3"));
-  net.Add(new CudnnPooling(), GenPoolingConf("pool3", false, 3, 2, 1));
-  net.Add(new Flatten(), GenFlattenConf("flat"));
-  net.Add(new Dense(), GenDenseConf("ip", 10, 0.01, 250));
+  net.Add(GenConvConf("conv1", 32, 5, 1, 2, 0.0001), &s);
+  net.Add(GenPoolingConf("pool1", true, 3, 2, 1));
+  net.Add(GenReLUConf("relu1"));
+  net.Add(GenLRNConf("lrn1"));
+  net.Add(GenConvConf("conv2", 32, 5, 1, 2, 0.01));
+  net.Add(GenReLUConf("relu2"));
+  net.Add(GenPoolingConf("pool2", false, 3, 2, 1));
+  net.Add(GenLRNConf("lrn2"));
+  net.Add(GenConvConf("conv3", 64, 5, 1, 2, 0.01));
+  net.Add(GenReLUConf("relu3"));
+  net.Add(GenPoolingConf("pool3", false, 3, 2, 1));
+  net.Add(GenFlattenConf("flat"));
+  net.Add(GenDenseConf("ip", 10, 0.01, 250));
   return net;
 }
 
@@ -184,12 +178,12 @@ void Train(float lr, int num_epoch, string data_dir) {
   Accuracy acc;
   net.Compile(true, &sgd, &loss, &acc);
 
-  auto cuda = std::make_shared<CudaGPU>();
-  net.ToDevice(cuda);
-  train_x.ToDevice(cuda);
-  train_y.ToDevice(cuda);
-  test_x.ToDevice(cuda);
-  test_y.ToDevice(cuda);
+  auto dev = std::make_shared<CudaGPU>();
+  net.ToDevice(dev);
+  train_x.ToDevice(dev);
+  train_y.ToDevice(dev);
+  test_x.ToDevice(dev);
+  test_y.ToDevice(dev);
   net.Train(100, num_epoch, train_x, train_y, test_x, test_y);
 }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 96c339a..9ed5599 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -31,7 +31,10 @@ from singa import loss
 from singa import net as ffnet
 
 
-def create_net():
+def create_net(use_cpu=False):
+    if use_cpu:
+        layer.engine = 'singa'
+
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}
     W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index cb4110d..3285651 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -96,16 +96,23 @@ def alexnet_lr(epoch):
         return 0.00001
 
 
-def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100):
+def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
+          use_cpu=False):
     print 'Start intialization............'
-    cuda = device.create_cuda_gpu()
-    net.to_device(cuda)
+    if use_cpu:
+        print 'Using CPU'
+        dev = device.get_default_device()
+    else:
+        print 'Using GPU'
+        dev = device.create_cuda_gpu()
+
+    net.to_device(dev)
     opt = optimizer.SGD(momentum=0.9, weight_decay=0.004)
     for (p, specs) in zip(net.param_values(), net.param_specs()):
         opt.register(p, specs)
 
-    tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
-    ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
+    tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
+    ty = tensor.Tensor((batch_size,), dev, core_pb2.kInt)
     train_x, train_y, test_x, test_y = data
     num_train_batch = train_x.shape[0] / batch_size
     num_test_batch = test_x.shape[0] / batch_size
@@ -127,7 +134,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100):
             # update progress bar
             utils.update_progress(b * 1.0 / num_train_batch,
                                   'training loss = %f, accuracy = %f' % (l, a))
-        info = 'training loss = %f, training accuracy = %f' \
+        info = '\ntraining loss = %f, training accuracy = %f' \
             % (loss / num_train_batch, acc / num_train_batch)
         print info
 
@@ -146,9 +153,11 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100):
     net.save('model.bin')  # save model params into checkpoint file
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Train vgg/alexnet for cifar10')
+    parser = argparse.ArgumentParser(description='Train vgg/alexnet for '
+                                     'cifar10 dataset')
     parser.add_argument('model', choices=['vgg', 'alexnet'], default='alexnet')
     parser.add_argument('data', default='cifar-10-batches-py')
+    parser.add_argument('--use_cpu', action='store_true')
     args = parser.parse_args()
     assert os.path.exists(args.data), \
         'Pls download the cifar10 dataset via "download_data.py py"'
@@ -157,9 +166,11 @@ if __name__ == '__main__':
     test_x, test_y = load_test_data(args.data)
     if args.model == 'alexnet':
         train_x, test_x = normalize_for_alexnet(train_x, test_x)
-        net = alexnet.create_net()
-        train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004)
+        net = alexnet.create_net(args.use_cpu)
+        train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004,
+              use_cpu=args.use_cpu)
     else:
         train_x, test_x = normalize_for_vgg(train_x, test_x)
-        net = vgg.create_net()
-        train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005)
+        net = vgg.create_net(args.use_cpu)
+        train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005,
+              use_cpu=args.use_cpu)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index c6b7fa1..149cb21 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -28,27 +28,20 @@
 #include "singa/utils/channel.h"
 #include "singa/utils/string.h"
 #include "singa/core/memory.h"
-#include "../../src/model/layer/cudnn_convolution.h"
-#include "../../src/model/layer/cudnn_activation.h"
-#include "../../src/model/layer/cudnn_pooling.h"
-#include "../../src/model/layer/cudnn_lrn.h"
-#include "../../src/model/layer/dropout.h"
-#include "../../src/model/layer/cudnn_batchnorm.h"
-#include "../../src/model/layer/dense.h"
-#include "../../src/model/layer/flatten.h"
 #include <thread>
 #include <memory>
 #include <cmath>
 
 namespace singa {
 
+const std::string engine = "cudnn";
 const float default_wd  = 0.0005f;
 
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std = .02f, float bias = .0f) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnConvolution");
+  conf.set_type(engine + "_convolution");
   ConvolutionConf *conv = conf.mutable_convolution_conf();
   conv->set_num_output(nb_filter);
   conv->add_kernel_size(kernel);
@@ -75,7 +68,7 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
                          int pad) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnPooling");
+  conf.set_type(engine + "_pooling");
   PoolingConf *pool = conf.mutable_pooling_conf();
   pool->set_kernel_size(kernel);
   pool->set_stride(stride);
@@ -87,14 +80,14 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
 LayerConf GenReLUConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("RELU");
+  conf.set_type(engine + "_relu");
   return conf;
 }
 
 LayerConf GenDenseConf(string name, int num_output, float std, float wd = default_wd) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Dense");
+  conf.set_type("singa_dense");
   DenseConf *dense = conf.mutable_dense_conf();
   dense->set_num_output(num_output);
 
@@ -116,14 +109,14 @@ LayerConf GenDenseConf(string name, int num_output, float std, float wd = defaul
 LayerConf GenFlattenConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Flatten");
+  conf.set_type("singa_flatten");
   return conf;
 }
 
 LayerConf GenBatchNormConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnBatchNorm");
+  conf.set_type(engine + "_batchnorm");
   ParamSpec *gammaspec = conf.add_param();
   gammaspec->set_name(name + "_gamma");
   auto gammafill = gammaspec->mutable_filler();
@@ -155,7 +148,7 @@ LayerConf GenBatchNormConf(string name) {
 LayerConf GenDropoutConf(string name, float dropout_ratio) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Dropout");
+  conf.set_type(engine + "_dropout");
   DropoutConf *dropout = conf.mutable_dropout_conf();
   dropout->set_dropout_ratio(dropout_ratio);
 
@@ -163,47 +156,47 @@ LayerConf GenDropoutConf(string name, float dropout_ratio) {
 }
 
 void ConvBNReLU(FeedForwardNet& net, string name, int nb_filter, Shape* shape = nullptr) {
-  net.Add(new CudnnConvolution(), GenConvConf(name+"_conv", nb_filter, 3, 1, 1), shape);
-  net.Add(new CudnnBatchNorm(), GenBatchNormConf(name+"_bn"));
-  net.Add(new CudnnActivation(), GenReLUConf(name+"_relu"));
+  net.Add(GenConvConf(name+"_conv", nb_filter, 3, 1, 1), shape);
+  net.Add(GenBatchNormConf(name+"_bn"));
+  net.Add(GenReLUConf(name+"_relu"));
 }
 
 FeedForwardNet CreateNet() {
   FeedForwardNet net;
   Shape s{3, 32, 32};
   ConvBNReLU(net, "conv1_1", 64, &s);
-  net.Add(new Dropout(), GenDropoutConf("drop1", 0.3));
+  net.Add(GenDropoutConf("drop1", 0.3));
   ConvBNReLU(net, "conv1_2", 64);
-  net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 2, 2, 0));
+  net.Add(GenPoolingConf("pool1", true, 2, 2, 0));
   ConvBNReLU(net, "conv2_1", 128);
-  net.Add(new Dropout(), GenDropoutConf("drop2", 0.4));
+  net.Add(GenDropoutConf("drop2", 0.4));
   ConvBNReLU(net, "conv2_2", 128);
-  net.Add(new CudnnPooling(), GenPoolingConf("pool2", true, 2, 2, 0));
+  net.Add(GenPoolingConf("pool2", true, 2, 2, 0));
   ConvBNReLU(net, "conv3_1", 256);
-  net.Add(new Dropout(), GenDropoutConf("drop3_1", 0.4));
+  net.Add(GenDropoutConf("drop3_1", 0.4));
   ConvBNReLU(net, "conv3_2", 256);
-  net.Add(new Dropout(), GenDropoutConf("drop3_2", 0.4));
+  net.Add(GenDropoutConf("drop3_2", 0.4));
   ConvBNReLU(net, "conv3_3", 256);
-  net.Add(new CudnnPooling(), GenPoolingConf("pool3", true, 2, 2, 0));
+  net.Add(GenPoolingConf("pool3", true, 2, 2, 0));
   ConvBNReLU(net, "conv4_1", 512);
-  net.Add(new Dropout(), GenDropoutConf("drop4_1", 0.4));
+  net.Add(GenDropoutConf("drop4_1", 0.4));
   ConvBNReLU(net, "conv4_2", 512);
-  net.Add(new Dropout(), GenDropoutConf("drop4_2", 0.4));
+  net.Add(GenDropoutConf("drop4_2", 0.4));
   ConvBNReLU(net, "conv4_3", 512);
-  net.Add(new CudnnPooling(), GenPoolingConf("pool4", true, 2, 2, 0));
+  net.Add(GenPoolingConf("pool4", true, 2, 2, 0));
   ConvBNReLU(net, "conv5_1", 512);
-  net.Add(new Dropout(), GenDropoutConf("drop5_1", 0.4));
+  net.Add(GenDropoutConf("drop5_1", 0.4));
   ConvBNReLU(net, "conv5_2", 512);
-  net.Add(new Dropout(), GenDropoutConf("drop5_2", 0.4));
+  net.Add(GenDropoutConf("drop5_2", 0.4));
   ConvBNReLU(net, "conv5_3", 512);
-  net.Add(new CudnnPooling(), GenPoolingConf("pool5", true, 2, 2, 0));
-  net.Add(new Flatten(), GenFlattenConf("flat"));
-  net.Add(new Dropout(), GenDropoutConf("flat_drop", 0.5));
-  net.Add(new Dense(), GenDenseConf("ip1", 512, 0.02));
-  net.Add(new CudnnBatchNorm(), GenBatchNormConf("ip1_bn"));
-  net.Add(new CudnnActivation(), GenReLUConf("ip1_relu"));
-  net.Add(new Dropout(), GenDropoutConf("ip1_drop", 0.5));
-  net.Add(new Dense(), GenDenseConf("ip2", 10, 0.02));
+  net.Add(GenPoolingConf("pool5", true, 2, 2, 0));
+  net.Add(GenFlattenConf("flat"));
+  net.Add(GenDropoutConf("flat_drop", 0.5));
+  net.Add(GenDenseConf("ip1", 512, 0.02));
+  net.Add(GenBatchNormConf("ip1_bn"));
+  net.Add(GenReLUConf("ip1_relu"));
+  net.Add(GenDropoutConf("ip1_drop", 0.5));
+  net.Add(GenDenseConf("ip2", 10, 0.02));
 
   return net;
 }
@@ -294,17 +287,17 @@ void Train(float lr, int num_epoch, string data_dir) {
   mem_conf.add_device(0);
   mem_conf.add_device(1);
   std::shared_ptr<DeviceMemPool> mem_pool(new CnMemPool(mem_conf));
-  std::shared_ptr<CudaGPU> cuda_1(new CudaGPU(0, mem_pool));
-  std::shared_ptr<CudaGPU> cuda_2(new CudaGPU(1, mem_pool));
-  net_1.ToDevice(cuda_1);
-  net_2.ToDevice(cuda_2);
-
-  train_x_1.ToDevice(cuda_1);
-  train_y_1.ToDevice(cuda_1);
-  test_x.ToDevice(cuda_1);
-  test_y.ToDevice(cuda_1);
-  train_x_2.ToDevice(cuda_2);
-  train_y_2.ToDevice(cuda_2);
+  std::shared_ptr<CudaGPU> dev_1(new CudaGPU(0, mem_pool));
+  std::shared_ptr<CudaGPU> dev_2(new CudaGPU(1, mem_pool));
+  net_1.ToDevice(dev_1);
+  net_2.ToDevice(dev_2);
+
+  train_x_1.ToDevice(dev_1);
+  train_y_1.ToDevice(dev_1);
+  test_x.ToDevice(dev_1);
+  test_y.ToDevice(dev_1);
+  train_x_2.ToDevice(dev_2);
+  train_y_2.ToDevice(dev_2);
 
   LOG(INFO) << "Launching thread...";
   std::thread t1 =

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 0b9bb56..97e690c 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -40,40 +40,42 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None):
     net.add(layer.Activation(name + '_3'))
 
 
-def create_net():
+def create_net(use_cpu=False):
+    if use_cpu:
+        layer.engine = 'singa'
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
-    net.add(layer.Dropout('drop1', 0.3, engine='cuda'))
+    net.add(layer.Dropout('drop1', 0.3))
     ConvBnReLU(net, 'conv1_2', 64)
     net.add(layer.MaxPooling2D('pool1', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv2_1', 128)
-    net.add(layer.Dropout('drop2_1', 0.4, engine='cuda'))
+    net.add(layer.Dropout('drop2_1', 0.4))
     ConvBnReLU(net, 'conv2_2', 128)
     net.add(layer.MaxPooling2D('pool2', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv3_1', 256)
-    net.add(layer.Dropout('drop3_1', 0.4, engine='cuda'))
+    net.add(layer.Dropout('drop3_1', 0.4))
     ConvBnReLU(net, 'conv3_2', 256)
-    net.add(layer.Dropout('drop3_2', 0.4, engine='cuda'))
+    net.add(layer.Dropout('drop3_2', 0.4))
     ConvBnReLU(net, 'conv3_3', 256)
     net.add(layer.MaxPooling2D('pool3', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv4_1', 512)
-    net.add(layer.Dropout('drop4_1', 0.4, engine='cuda'))
+    net.add(layer.Dropout('drop4_1', 0.4))
     ConvBnReLU(net, 'conv4_2', 512)
-    net.add(layer.Dropout('drop4_2', 0.4, engine='cuda'))
+    net.add(layer.Dropout('drop4_2', 0.4))
     ConvBnReLU(net, 'conv4_3', 512)
     net.add(layer.MaxPooling2D('pool4', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv5_1', 512)
-    net.add(layer.Dropout('drop5_1', 0.4, engine='cuda'))
+    net.add(layer.Dropout('drop5_1', 0.4))
     ConvBnReLU(net, 'conv5_2', 512)
-    net.add(layer.Dropout('drop5_2', 0.4, engine='cuda'))
+    net.add(layer.Dropout('drop5_2', 0.4))
     ConvBnReLU(net, 'conv5_3', 512)
     net.add(layer.MaxPooling2D('pool5', 2, 2, border_mode='valid'))
     net.add(layer.Flatten('flat'))
-    net.add(layer.Dropout('drop_flat', 0.5, engine='cuda'))
+    net.add(layer.Dropout('drop_flat', 0.5))
     net.add(layer.Dense('ip1', 512))
     net.add(layer.BatchNormalization('batchnorm_ip1'))
     net.add(layer.Activation('relu_ip1'))
-    net.add(layer.Dropout('drop_ip2', 0.5, engine='cuda'))
+    net.add(layer.Dropout('drop_ip2', 0.5))
     net.add(layer.Dense('ip2', 10))
     print 'Start intialization............'
     for (p, name) in zip(net.param_values(), net.param_names()):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/examples/imagenet/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/imagenet/alexnet.cc b/examples/imagenet/alexnet.cc
index 270312c..3fb5d04 100644
--- a/examples/imagenet/alexnet.cc
+++ b/examples/imagenet/alexnet.cc
@@ -22,13 +22,6 @@
 #include "singa/singa_config.h"
 #ifdef USE_OPENCV
 #include <cmath>
-#include "../../src/model/layer/cudnn_activation.h"
-#include "../../src/model/layer/cudnn_convolution.h"
-#include "../../src/model/layer/dropout.h"
-#include "../../src/model/layer/cudnn_lrn.h"
-#include "../../src/model/layer/cudnn_pooling.h"
-#include "../../src/model/layer/dense.h"
-#include "../../src/model/layer/flatten.h"
 #include "./ilsvrc12.h"
 #include "singa/io/snapshot.h"
 #include "singa/model/feed_forward_net.h"
@@ -40,11 +33,12 @@
 #include "singa/utils/timer.h"
 namespace singa {
 
+const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std, float bias = .0f) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnConvolution");
+  conf.set_type(engine + "_convolution");
   ConvolutionConf *conv = conf.mutable_convolution_conf();
   conv->set_num_output(nb_filter);
   conv->add_kernel_size(kernel);
@@ -71,7 +65,7 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
                          int pad) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnPooling");
+  conf.set_type(engine + "_pooling");
   PoolingConf *pool = conf.mutable_pooling_conf();
   pool->set_kernel_size(kernel);
   pool->set_stride(stride);
@@ -83,7 +77,7 @@ LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
 LayerConf GenReLUConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("RELU");
+  conf.set_type(engine + "_relu");
   return conf;
 }
 
@@ -91,7 +85,7 @@ LayerConf GenDenseConf(string name, int num_output, float std, float wd,
                        float bias = .0f) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Dense");
+  conf.set_type("singa_dense");
   DenseConf *dense = conf.mutable_dense_conf();
   dense->set_num_output(num_output);
 
@@ -115,7 +109,7 @@ LayerConf GenDenseConf(string name, int num_output, float std, float wd,
 LayerConf GenLRNConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnLRN");
+  conf.set_type(engine + "_lrn");
   LRNConf *lrn = conf.mutable_lrn_conf();
   lrn->set_local_size(5);
   lrn->set_alpha(1e-04);
@@ -126,14 +120,14 @@ LayerConf GenLRNConf(string name) {
 LayerConf GenFlattenConf(string name) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Flatten");
+  conf.set_type("singa_flatten");
   return conf;
 }
 
 LayerConf GenDropoutConf(string name, float dropout_ratio) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("Dropout");
+  conf.set_type(engine + "_dropout");
   DropoutConf *dropout = conf.mutable_dropout_conf();
   dropout->set_dropout_ratio(dropout_ratio);
   return conf;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index 778a130..4c46114 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -321,23 +321,33 @@ public:
   /// Return a string containing all hardware info, e.g., version, memory size.
   static const std::string DeviceQuery(int id, bool verbose = false);
 
+  /// Return the defualt host device
+  static std::shared_ptr<Device> GetDefaultDevice() {
+    return defaultDevice;
+  }
+
   /// Create a set of CudaGPU Device using 'num_devices' free GPUs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
 
   /// Create a set of CudaGPU Device using given GPU IDs.
   static const std::vector<std::shared_ptr<Device>>
-  CreateCudaGPUs(const std::vector<int> &devices, size_t init_size = 0);
-
-  /// Create a \p num_devices set of valid OpenCL devices, regardless of platforms.
-  /// If there are fewer valid devices than requested, then this method will return as many as possible.
-  /// If OpenCL is not in use, this method will return an empty array.
-  const std::vector<std::shared_ptr<Device>> CreateOpenclDevices(const size_t num_devices);
-
-  /// Create a set of valid OpenCL devices, regardless of platforms, assigning \p id to each device in sequence.
-  /// If there are fewer valid devices than requested, then this method will return as many as possible.
+  CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+
+  /// Create a \p num_devices set of valid OpenCL devices, regardless of
+  /// platforms.  If there are fewer valid devices than requested, then this
+  /// method will return as many as possible.If OpenCL is not in use, this
+  /// method will return an empty array.
+  const std::vector<std::shared_ptr<Device> > CreateOpenclDevices(
+             const size_t num_devices);
+
+  /// Create a set of valid OpenCL devices, regardless of platforms, assigning
+  /// \p id to each device in sequence.
+  /// If there are fewer valid devices than requested, then this method will
+  /// return as many as possible.
   /// If OpenCL is not in use, this method will return an empty array.
-  const std::vector<std::shared_ptr<Device>> CreateOpenclDevices(const vector<int>& id);
+  const std::vector<std::shared_ptr<Device> >
+  CreateOpenclDevices(const vector<int> &id);
 
   /// This function is implementd by Caffe (http://caffe.berkeleyvision.org/).
   /// This function checks the availability of GPU #device_id.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/include/singa/model/feed_forward_net.h
----------------------------------------------------------------------
diff --git a/include/singa/model/feed_forward_net.h b/include/singa/model/feed_forward_net.h
index 8adc259..1bf112c 100644
--- a/include/singa/model/feed_forward_net.h
+++ b/include/singa/model/feed_forward_net.h
@@ -39,7 +39,7 @@ class FeedForwardNet {
   ///    following the topological order.
   /// 2. this layer has already been setup (Setup function is called outside).
   /// The layer will be freed in the destructor of FeedForwardNet.
-  Layer* Add(Layer* layer);
+  std::shared_ptr<Layer> Add(std::shared_ptr<Layer> layer);
 
   // TODO(wangwei) add ConcatenateLayer and SliceLayer
   // AddConcatenateLayer(vector<Layer*> src, Layer *dst);
@@ -49,11 +49,9 @@ class FeedForwardNet {
   /// Assume the layer is added in corret order.
   /// For the first layer, 'sample_shape' (the input sample shape) is necessary
   /// for calling Setup().
-  Layer* Add(const LayerConf& conf, const Shape* sample_shape = nullptr);
+  std::shared_ptr<Layer> Add(const LayerConf& conf,
+      const Shape* sample_shape = nullptr);
 
-  /// Add a layer, and call its Setup function.
-  Layer* Add(Layer* layer, const LayerConf& conf,
-             const Shape* sample_shape = nullptr);
   /// Set some fields used for training and evaluating the neural net.
   /// This method will instantiate an Updater ,then wrap the Optimier into
   /// Updater and always register the parameters of the net instance.
@@ -147,13 +145,13 @@ class FeedForwardNet {
     return std::thread([=]() { Train(batchsize, nb_epoch, x, y); });
   }
 
-  const vector<Layer*> layers() const { return layers_; }
+  const vector<std::shared_ptr<Layer>> layers() const { return layers_; }
   const vector<string> GetParamNames() const;
   const vector<ParamSpec> GetParamSpecs() const;
   const vector<Tensor> GetParamValues() const;
 
  protected:
-  vector<Layer*> layers_;
+  vector<std::shared_ptr<Layer>> layers_;
   std::shared_ptr<Updater> updater_;
   Loss* loss_;
   Metric* metric_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/include/singa/model/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h
index d31bd95..58f0f4b 100644
--- a/include/singa/model/layer.h
+++ b/include/singa/model/layer.h
@@ -222,8 +222,8 @@ class Layer {
   vector<ParamSpec> param_specs_;
 };
 
-#define RegisterLayerClass(SubLayer) \
-  static Registra<Layer, SubLayer> _##SubLayer##Layer(#SubLayer);
+#define RegisterLayerClass(Name, SubLayer) \
+  static Registra<Layer, SubLayer> Name##SubLayer(#Name);
 
 inline std::shared_ptr<Layer> CreateLayer(const std::string type) {
   std::shared_ptr<Layer> layer(Factory<Layer>::Create(type));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/include/singa/utils/integer.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/integer.h b/include/singa/utils/integer.h
new file mode 100644
index 0000000..9c2799d
--- /dev/null
+++ b/include/singa/utils/integer.h
@@ -0,0 +1,73 @@
+/************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *************************************************************/
+
+#ifndef INTEGER_H_
+#define INTEGER_H_
+
+#include <cstdint>
+
+namespace singa{
+static bool isNetworkOrder() {
+    int test = 1;
+    return (1 != *(uint8_t*)&test);
+}
+
+template <typename T>
+static inline T byteSwap(const T& v) {
+    int size = sizeof(v);
+    T ret;
+    uint8_t *dest = reinterpret_cast<uint8_t *>(&ret);
+    uint8_t *src = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(&v));
+    for (int i = 0; i < size; ++i) {
+        dest[i] = src[size - i - 1];
+    }
+    return ret;
+}
+
+template <typename T>
+static inline T hton(const T& v)
+{
+    return isNetworkOrder() ? v : byteSwap(v);
+}
+
+template <typename T>
+static inline T ntoh(const T& v) 
+{
+    return hton(v);
+}
+
+static inline int appendInteger(char* buf) {return 0;}
+static inline int readInteger(char* buf) {return 0;}
+
+template<typename Type, typename... Types>
+static int appendInteger(char* buf, Type value, Types... values) {
+    *(Type*)buf = hton(value);
+    return sizeof(Type) + appendInteger(buf + sizeof(Type), values...);
+}
+
+template<typename Type, typename... Types>
+static int readInteger(char* buf, Type& value, Types&... values) {
+    value = ntoh(*(Type*)buf);
+    return sizeof(Type) + readInteger(buf + sizeof(Type), values...);
+}
+
+}
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/core/device/platform.cc
----------------------------------------------------------------------
diff --git a/src/core/device/platform.cc b/src/core/device/platform.cc
index a4561de..a3661f2 100644
--- a/src/core/device/platform.cc
+++ b/src/core/device/platform.cc
@@ -113,11 +113,11 @@ Platform::CreateCudaGPUs(const size_t num_devices, size_t init_size) {
   const vector<int> gpus = GetGPUIDs();
   CHECK_LE(num_devices, gpus.size());
   vector<int> use_gpus(gpus.begin(), gpus.begin() + num_devices);
-  return CreateCudaGPUs(use_gpus, init_size);
+  return CreateCudaGPUsOn(use_gpus, init_size);
 }
 
 const vector<shared_ptr<Device> >
-Platform::CreateCudaGPUs(const vector<int> &devices, size_t init_size) {
+Platform::CreateCudaGPUsOn(const vector<int> &devices, size_t init_size) {
   MemPoolConf conf;
   if (init_size > 0)
     conf.set_init_size(init_size);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index e260f9e..dfb1eb2 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -452,7 +452,7 @@ float Tensor::L1() const {
   float nrm = 0.0f;
   TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, {
     device_->Exec([&nrm, this](Context *ctx) {
-      DType ret;
+      DType ret = DType(0);
       Asum<DType, Lang>(this->Size(), this->block(), &ret, ctx);
       nrm = TypeCast<DType, float>(ret);
     }, {this->block()}, {});
@@ -465,7 +465,7 @@ float Tensor::L2() const {
   float nrm = 0.0f;
   TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, {
     device_->Exec([&nrm, this](Context *ctx) {
-      DType ret;
+      DType ret = DType(0);
       Nrm2<DType, Lang>(this->Size(), this->block(), &ret, ctx);
       nrm = TypeCast<DType, float>(ret);
     }, {this->block()}, {});

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index 941931d..a2802d5 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -239,7 +239,7 @@ void Sqrt<float, lang::Cpp>(const size_t num, const Block *in, Block *out,
   float *outPtr = static_cast<float *>(out->mutable_data());
   const float *inPtr = static_cast<const float *>(in->data());
   for (size_t i = 0; i < num; i++) {
-    CHECK_GT(inPtr[i], 0.f);
+    CHECK_GE(inPtr[i], 0.f);
     outPtr[i] = sqrt(inPtr[i]);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/feed_forward_net.cc
----------------------------------------------------------------------
diff --git a/src/model/feed_forward_net.cc b/src/model/feed_forward_net.cc
index 9450c9e..514d6e2 100644
--- a/src/model/feed_forward_net.cc
+++ b/src/model/feed_forward_net.cc
@@ -26,23 +26,16 @@
 namespace singa {
 
 FeedForwardNet::~FeedForwardNet() {
-  for (auto layer : layers_) delete layer;
-}
-Layer* FeedForwardNet::Add(Layer* layer) {
-  layers_.push_back(layer);
-  return layer;
 }
 
-Layer* FeedForwardNet::Add(const LayerConf& conf, const Shape* sample_shape) {
-  CHECK(sample_shape != nullptr || layers_.size())
-      << "Must provide the input sample shape for the first layer";
-  Layer* layer = nullptr;  // TODO(wangwei) use CreateLayer(conf.type());
-  Add(layer, conf, sample_shape);
+std::shared_ptr<Layer> FeedForwardNet::Add(std::shared_ptr<Layer> layer) {
+  layers_.push_back(layer);
   return layer;
 }
 
-Layer* FeedForwardNet::Add(Layer* layer, const LayerConf& conf,
-                           const Shape* sample_shape) {
+std::shared_ptr<Layer> FeedForwardNet::Add(const LayerConf& conf,
+    const Shape* sample_shape) {
+  std::shared_ptr<Layer> layer(CreateLayer(conf.type()));
   CHECK(conf.has_name()) << "Must set layer name";
   if (sample_shape == nullptr)
     layer->Setup(layers_.back()->GetOutputSampleShape(), conf);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/activation.cc b/src/model/layer/activation.cc
index 2497c31..aa40edb 100644
--- a/src/model/layer/activation.cc
+++ b/src/model/layer/activation.cc
@@ -18,14 +18,23 @@
 
 #include "singa/model/layer.h"
 #include "./activation.h"
+#include "singa/utils/string.h"
 namespace singa {
 
-RegisterLayerClass(Activation);
+RegisterLayerClass(singa_relu, Activation);
+RegisterLayerClass(singa_sigmoid, Activation);
+RegisterLayerClass(singa_tanh, Activation);
 
 void Activation::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
-  mode_ = conf.type();
-  if (mode_ == "RELU") {
+  auto pos = conf.type().find_first_of('_');
+  CHECK_NE(pos, string::npos) << "There should be a '_' in the laye type "
+    << conf.type();
+  mode_ = ToLowerCase(conf.type().substr(pos + 1));
+  if (mode_ != "relu" && mode_ != "sigmoid" && mode_ != "tanh")
+    LOG(FATAL) << "Unkown activation type: " << conf.type() << " " << mode_
+      << ". Please use singa_relu, singa_sigmoid, or singa_tanh";
+  if (mode_ == "relu") {
     neg_slope_ = conf.relu_conf().negative_slope();
   }
   out_sample_shape_ = in_sample;
@@ -33,13 +42,13 @@ void Activation::Setup(const Shape& in_sample, const LayerConf& conf) {
 
 const Tensor Activation::Forward(int flag, const Tensor& input) {
   Tensor output;
-  if (mode_ == "SIGMOID") {
+  if (mode_ == "sigmoid") {
     output = Sigmoid(input);
     if (flag & kTrain) buf_.push(output);
-  } else if (mode_ == "TANH") {
+  } else if (mode_ == "tanh") {
     output = Tanh(input);
     if (flag & kTrain) buf_.push(output);
-  } else if (mode_ == "RELU") {
+  } else if (mode_ == "relu") {
     output = ReLU(input);
     if (flag & kTrain) buf_.push(input);
   } else
@@ -55,11 +64,11 @@ const std::pair<Tensor, vector<Tensor>> Activation::Backward(
   // activation.
   Tensor input_grad, inout = buf_.top();
   buf_.pop();
-  if (mode_ == "SIGMOID")
+  if (mode_ == "sigmoid")
     input_grad = grad * inout * (inout * (-1.f) + 1.f);
-  else if (mode_ == "TANH")
+  else if (mode_ == "tanh")
     input_grad = grad * (inout * inout * (-1.f) + 1.f);
-  else if (mode_ == "RELU")
+  else if (mode_ == "relu")
     input_grad = grad * (inout > 0.f) + (inout <= 0.f) * neg_slope_;
   else LOG(FATAL) << "Unkown activation: " << mode_;
   return std::make_pair(input_grad, param_grad);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/activation.h
----------------------------------------------------------------------
diff --git a/src/model/layer/activation.h b/src/model/layer/activation.h
index e3fb657..7d15979 100644
--- a/src/model/layer/activation.h
+++ b/src/model/layer/activation.h
@@ -26,7 +26,7 @@ namespace singa {
 class Activation : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Activation"; }
+  // const std::string layer_type() const override { return "Activation"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index 6ea9f2a..f348661 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -21,7 +21,7 @@
 #include "batchnorm.h"
 
 namespace singa {
-RegisterLayerClass(BatchNorm);
+RegisterLayerClass(singa_batchnorm, BatchNorm);
 void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;
@@ -78,8 +78,8 @@ const Tensor BatchNorm::Forward(int flag, const Tensor& input) {
     runningVariance_ *= 1.0f - factor_;
     Axpy(factor_, var, &runningVariance_);
     Tensor tmp = var.Clone();
-    tmp += 1e-6f;
     tmp = Sqrt(tmp);
+    tmp += 1e-6f;
     xnorm = x.Clone();
     SubRow(mean, &xnorm);
     DivRow(tmp, &xnorm);
@@ -94,8 +94,8 @@ const Tensor BatchNorm::Forward(int flag, const Tensor& input) {
     xnorm = x.Clone();
     SubRow(runningMean_, &xnorm);
     Tensor tmp = runningVariance_.Clone();
-    tmp += 1e-6f;
     tmp = Sqrt(tmp);
+    tmp += 1e-6f;
     DivRow(tmp, &xnorm);
     output = xnorm.Clone();
     MultRow(bnScale_, &output);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/batchnorm.h
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.h b/src/model/layer/batchnorm.h
index f3d83ab..c2cfde9 100644
--- a/src/model/layer/batchnorm.h
+++ b/src/model/layer/batchnorm.h
@@ -29,7 +29,7 @@ namespace singa {
 class BatchNorm : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "BatchNorm"; }
+  // const std::string layer_type() const override { return "BatchNorm"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 1bf6b39..4fc209f 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -23,7 +23,7 @@
 namespace singa {
 using std::vector;
 
-RegisterLayerClass(Convolution);
+RegisterLayerClass(singa_convolution, Convolution);
 void Convolution::Setup(const Shape &in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   ConvolutionConf conv_conf = conf.convolution_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/convolution.h
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.h b/src/model/layer/convolution.h
index 1383a66..d85a17b 100644
--- a/src/model/layer/convolution.h
+++ b/src/model/layer/convolution.h
@@ -27,7 +27,7 @@ namespace singa {
 class Convolution : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Convolution"; }
+  // const std::string layer_type() const override { return "Convolution"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const vector<size_t>& in_shape, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_activation.cc b/src/model/layer/cudnn_activation.cc
index c86539d..4ecb375 100644
--- a/src/model/layer/cudnn_activation.cc
+++ b/src/model/layer/cudnn_activation.cc
@@ -25,7 +25,9 @@
 #include "singa/utils/logging.h"
 
 namespace singa {
-RegisterLayerClass(CudnnActivation);
+RegisterLayerClass(cudnn_relu, CudnnActivation);
+RegisterLayerClass(cudnn_sigmoid, CudnnActivation);
+RegisterLayerClass(cudnn_tanh, CudnnActivation);
 CudnnActivation::~CudnnActivation() {
   if (acti_desc_ != nullptr)
     CUDNN_CHECK(cudnnDestroyActivationDescriptor(acti_desc_));
@@ -40,11 +42,11 @@ void CudnnActivation::InitCudnn(size_t size, DataType dtype) {
   CUDNN_CHECK(cudnnSetTensor4dDescriptor(
       desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), 1, 1, 1, size));
 
-  if (mode_ == "SIGMOID")
+  if (mode_ == "sigmoid")
     cudnn_mode_ = CUDNN_ACTIVATION_SIGMOID;
-  else if (mode_ == "TANH")
+  else if (mode_ == "tanh")
     cudnn_mode_ = CUDNN_ACTIVATION_TANH;
-  else if (mode_ == "RELU")
+  else if (mode_ == "relu")
     cudnn_mode_ = CUDNN_ACTIVATION_RELU;
   else
     LOG(FATAL) << "Unkown activation: " << mode_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_activation.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_activation.h b/src/model/layer/cudnn_activation.h
index 526e03f..c69d157 100644
--- a/src/model/layer/cudnn_activation.h
+++ b/src/model/layer/cudnn_activation.h
@@ -35,7 +35,7 @@ class CudnnActivation : public Activation {
  public:
   ~CudnnActivation();
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnActivation"; }
+  // const std::string layer_type() const override { return "CudnnActivation"; }
 
   const Tensor Forward(int flag, const Tensor& input) override;
   const std::pair<Tensor, vector<Tensor>> Backward(int flag,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_batchnorm.cc b/src/model/layer/cudnn_batchnorm.cc
index 461f1b6..01682b7 100644
--- a/src/model/layer/cudnn_batchnorm.cc
+++ b/src/model/layer/cudnn_batchnorm.cc
@@ -23,7 +23,7 @@
 
 namespace singa {
 
-RegisterLayerClass(CudnnBatchNorm);
+RegisterLayerClass(cudnn_batchnorm, CudnnBatchNorm);
 CudnnBatchNorm::~CudnnBatchNorm() {
   if (has_init_cudnn_) {
     CUDNN_CHECK(cudnnDestroyTensorDescriptor(shape_desc_));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_batchnorm.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_batchnorm.h b/src/model/layer/cudnn_batchnorm.h
index 4f46452..c4390a1 100644
--- a/src/model/layer/cudnn_batchnorm.h
+++ b/src/model/layer/cudnn_batchnorm.h
@@ -31,7 +31,7 @@ class CudnnBatchNorm : public BatchNorm {
  public:
   ~CudnnBatchNorm();
   /// \copy doc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnBatchNorm"; }
+  // const std::string layer_type() const override { return "CudnnBatchNorm"; }
 
   void Setup(const Shape& in_sample, const LayerConf& conf) override;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_convolution.cc b/src/model/layer/cudnn_convolution.cc
index e5efec0..ffd2ab7 100644
--- a/src/model/layer/cudnn_convolution.cc
+++ b/src/model/layer/cudnn_convolution.cc
@@ -23,7 +23,7 @@
 #include "singa/utils/logging.h"
 
 namespace singa {
-RegisterLayerClass(CudnnConvolution);
+RegisterLayerClass(cudnn_convolution, CudnnConvolution);
 CudnnConvolution::~CudnnConvolution() {
   if (bias_desc_ != nullptr)
     CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc_));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_convolution.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_convolution.h b/src/model/layer/cudnn_convolution.h
index cd0471f..545fd5c 100644
--- a/src/model/layer/cudnn_convolution.h
+++ b/src/model/layer/cudnn_convolution.h
@@ -34,7 +34,7 @@ class CudnnConvolution : public Convolution {
  public:
   ~CudnnConvolution();
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnConvolution"; }
+  // const std::string layer_type() const override { return "CudnnConvolution";}
 
   const Tensor Forward(int flag, const Tensor &input) override;
   const std::pair<Tensor, vector<Tensor>> Backward(int flag,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_dropout.cc b/src/model/layer/cudnn_dropout.cc
index e6950ca..c5b62cf 100644
--- a/src/model/layer/cudnn_dropout.cc
+++ b/src/model/layer/cudnn_dropout.cc
@@ -27,7 +27,7 @@
 #include "singa/utils/logging.h"
 
 namespace singa {
-RegisterLayerClass(CudnnDropout);
+RegisterLayerClass(cudnn_dropout, CudnnDropout);
 CudnnDropout::~CudnnDropout() {
   if (drop_desc_ != nullptr)
     CUDNN_CHECK(cudnnDestroyDropoutDescriptor(drop_desc_));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_dropout.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_dropout.h b/src/model/layer/cudnn_dropout.h
index 9e0cb9e..1241911 100644
--- a/src/model/layer/cudnn_dropout.h
+++ b/src/model/layer/cudnn_dropout.h
@@ -36,7 +36,7 @@ class CudnnDropout : public Dropout {
  public:
   ~CudnnDropout();
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnDropout"; }
+  // const std::string layer_type() const override { return "CudnnDropout"; }
 
   const Tensor Forward(int flag, const Tensor& input) override;
   const std::pair<Tensor, vector<Tensor>> Backward(int flag,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_lrn.cc b/src/model/layer/cudnn_lrn.cc
index 540beb1..ac7645e 100644
--- a/src/model/layer/cudnn_lrn.cc
+++ b/src/model/layer/cudnn_lrn.cc
@@ -23,7 +23,7 @@
 #include "cudnn_utils.h"
 
 namespace singa {
-RegisterLayerClass(CudnnLRN);
+RegisterLayerClass(cudnn_lrn, CudnnLRN);
 CudnnLRN::~CudnnLRN() {
   if (has_init_cudnn_) {
     CUDNN_CHECK(cudnnDestroyLRNDescriptor(lrn_desc_));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_lrn.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_lrn.h b/src/model/layer/cudnn_lrn.h
index e2a5e54..c48571d 100644
--- a/src/model/layer/cudnn_lrn.h
+++ b/src/model/layer/cudnn_lrn.h
@@ -31,7 +31,7 @@ class CudnnLRN : public LRN {
  public:
   ~CudnnLRN();
   /// \copy doc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnLRN"; }
+  // const std::string layer_type() const override { return "CudnnLRN"; }
 
   const Tensor Forward(int flag, const Tensor& input) override;
   const std::pair<Tensor, vector<Tensor>> Backward(int flag,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_pooling.cc b/src/model/layer/cudnn_pooling.cc
index 984427c..895ce3c 100644
--- a/src/model/layer/cudnn_pooling.cc
+++ b/src/model/layer/cudnn_pooling.cc
@@ -25,7 +25,7 @@
 #include "singa/utils/logging.h"
 
 namespace singa {
-RegisterLayerClass(CudnnPooling);
+RegisterLayerClass(cudnn_pooling, CudnnPooling);
 CudnnPooling::~CudnnPooling() {
   if (pool_desc_ != nullptr)
     CUDNN_CHECK(cudnnDestroyPoolingDescriptor(pool_desc_));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_pooling.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_pooling.h b/src/model/layer/cudnn_pooling.h
index 90779f5..2080db3 100644
--- a/src/model/layer/cudnn_pooling.h
+++ b/src/model/layer/cudnn_pooling.h
@@ -35,7 +35,7 @@ class CudnnPooling : public Pooling {
  public:
   ~CudnnPooling();
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnPooling"; }
+  // const std::string layer_type() const override { return "CudnnPooling"; }
 
   void Setup(const Shape& in_sample, const LayerConf &conf) override;
   const Tensor Forward(int flag, const Tensor &input) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_rnn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_rnn.cc b/src/model/layer/cudnn_rnn.cc
index bfbfa48..9961df2 100644
--- a/src/model/layer/cudnn_rnn.cc
+++ b/src/model/layer/cudnn_rnn.cc
@@ -24,6 +24,7 @@
 #include "singa/utils/logging.h"
 
 namespace singa {
+RegisterLayerClass(cudnn_rnn, CudnnRNN);
 CudnnRNN::~CudnnRNN() {
   if (weight_desc_ != nullptr)
     CUDNN_CHECK(cudnnDestroyFilterDescriptor(weight_desc_));
@@ -126,25 +127,19 @@ void CudnnRNN::SetRNNDescriptor(shared_ptr<Device> dev) {
       dropout_state_.block()->mutable_data(), state_size, seed_));
 
   CUDNN_CHECK(cudnnCreateRNNDescriptor(&rnn_desc_));
-  cudnnRNNInputMode_t input_mode;
-  if (input_mode_ == "linear")
-    input_mode = CUDNN_LINEAR_INPUT;
-  else if (input_mode_ == "skip")
+  cudnnRNNInputMode_t input_mode = CUDNN_LINEAR_INPUT;
+  if (input_mode_ == "skip")
     input_mode = CUDNN_SKIP_INPUT;
 
-  cudnnDirectionMode_t direction;
-  if (direction_ == "unidirectional")
-    direction = CUDNN_UNIDIRECTIONAL;
-  else if (direction_ == "bidirectional")
+  cudnnDirectionMode_t direction = CUDNN_UNIDIRECTIONAL;
+  if (direction_ == "bidirectional")
     direction = CUDNN_BIDIRECTIONAL;
 
-  cudnnRNNMode_t rnn_mode;
+  cudnnRNNMode_t rnn_mode = CUDNN_LSTM;
   if (rnn_mode_ == "relu")
     rnn_mode = CUDNN_RNN_RELU;
   else if (rnn_mode_ == "tanh")
     rnn_mode = CUDNN_RNN_TANH;
-  else if (rnn_mode_ == "lstm")
-    rnn_mode = CUDNN_LSTM;
   else if (rnn_mode_ == "gru")
     rnn_mode = CUDNN_GRU;
   CUDNN_CHECK(cudnnSetRNNDescriptor(rnn_desc_, hidden_size_, num_stacks_,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_rnn.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_rnn.h b/src/model/layer/cudnn_rnn.h
index cfb8aac..82c68b0 100644
--- a/src/model/layer/cudnn_rnn.h
+++ b/src/model/layer/cudnn_rnn.h
@@ -39,7 +39,7 @@ class CudnnRNN : public RNN {
  public:
   ~CudnnRNN();
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnRNN"; }
+  // const std::string layer_type() const override { return "CudnnRNN"; }
 
   const vector<Tensor> Forward(int flag, const vector<Tensor>& inputs) override;
   const std::pair<vector<Tensor>, vector<Tensor>> Backward(

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_softmax.cc b/src/model/layer/cudnn_softmax.cc
index 6dce68f..f1a4a5b 100644
--- a/src/model/layer/cudnn_softmax.cc
+++ b/src/model/layer/cudnn_softmax.cc
@@ -23,7 +23,7 @@
 #include "singa/utils/logging.h"
 namespace singa {
 
-RegisterLayerClass(CudnnSoftmax);
+RegisterLayerClass(cudnn_softmax, CudnnSoftmax);
 CudnnSoftmax::~CudnnSoftmax() {
   if (desc_ != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(desc_));
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_softmax.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_softmax.h b/src/model/layer/cudnn_softmax.h
index aca3729..532a643 100644
--- a/src/model/layer/cudnn_softmax.h
+++ b/src/model/layer/cudnn_softmax.h
@@ -34,7 +34,7 @@ class CudnnSoftmax : public Softmax {
  public:
   ~CudnnSoftmax();
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "CudnnSoftmax"; }
+  // const std::string layer_type() const override { return "CudnnSoftmax"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample_shape, const LayerConf &conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/cudnn_utils.h
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_utils.h b/src/model/layer/cudnn_utils.h
index 19c72ec..64ee758 100644
--- a/src/model/layer/cudnn_utils.h
+++ b/src/model/layer/cudnn_utils.h
@@ -26,7 +26,7 @@
 #include "singa/utils/logging.h"
 namespace singa {
 inline cudnnDataType_t GetCudnnDataType(DataType dtype) {
-  cudnnDataType_t ret;
+  cudnnDataType_t ret = CUDNN_DATA_FLOAT;
   switch (dtype) {
     case kFloat32:
       ret = CUDNN_DATA_FLOAT;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 557d8bd..1a2d16e 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -23,7 +23,7 @@
 namespace singa {
 using std::vector;
 
-RegisterLayerClass(Dense);
+RegisterLayerClass(singa_dense, Dense);
 Dense::~Dense() {
   // delete weight_;
   // delete bias_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
index bb5db66..8a149a5 100644
--- a/src/model/layer/dense.h
+++ b/src/model/layer/dense.h
@@ -28,7 +28,7 @@ class Dense : public Layer {
  public:
   ~Dense();
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Dense"; }
+  // const std::string layer_type() const override { return "Dense"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dropout.cc b/src/model/layer/dropout.cc
index 0a4b1df..35801b4 100644
--- a/src/model/layer/dropout.cc
+++ b/src/model/layer/dropout.cc
@@ -20,7 +20,7 @@
 #include "./dropout.h"
 namespace singa {
 
-RegisterLayerClass(Dropout);
+RegisterLayerClass(singa_dropout, Dropout);
 void Dropout::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   dropout_ratio_ = conf.dropout_conf().dropout_ratio();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/dropout.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dropout.h b/src/model/layer/dropout.h
index 1a4bdbf..711c86b 100644
--- a/src/model/layer/dropout.h
+++ b/src/model/layer/dropout.h
@@ -26,7 +26,7 @@ namespace singa {
 class Dropout : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Dropout"; }
+  // const std::string layer_type() const override { return "Dropout"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/flatten.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/flatten.cc b/src/model/layer/flatten.cc
index e7d8fa0..d89361e 100644
--- a/src/model/layer/flatten.cc
+++ b/src/model/layer/flatten.cc
@@ -20,7 +20,7 @@
 #include "./flatten.h"
 namespace singa {
 
-RegisterLayerClass(Flatten);
+RegisterLayerClass(singa_flatten, Flatten);
 void Flatten::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   axis_ = conf.flatten_conf().axis();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/flatten.h
----------------------------------------------------------------------
diff --git a/src/model/layer/flatten.h b/src/model/layer/flatten.h
index 6ac90c2..8bbf481 100644
--- a/src/model/layer/flatten.h
+++ b/src/model/layer/flatten.h
@@ -26,7 +26,7 @@ namespace singa {
 class Flatten : public Layer {
  public:
   /// \copydoc Layer::layer_type();
-  const std::string layer_type() const override { return "Flatten"; }
+  // const std::string layer_type() const override { return "Flatten"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/lrn.cc b/src/model/layer/lrn.cc
index a624147..6b5a618 100644
--- a/src/model/layer/lrn.cc
+++ b/src/model/layer/lrn.cc
@@ -22,7 +22,7 @@
 #include <vector>
 
 namespace singa {
-RegisterLayerClass(LRN);
+RegisterLayerClass(singa_lrn, LRN);
 void LRN::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/lrn.h
----------------------------------------------------------------------
diff --git a/src/model/layer/lrn.h b/src/model/layer/lrn.h
index 0632f8c..57e26ba 100644
--- a/src/model/layer/lrn.h
+++ b/src/model/layer/lrn.h
@@ -27,9 +27,7 @@ namespace singa {
 class LRN : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override {
-    return "LRN";
-  }
+  // const std::string layer_type() const override { return "LRN"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index 943f9b2..5e7ba1d 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -20,7 +20,7 @@
 #include "singa/model/layer.h"
 namespace singa {
 
-RegisterLayerClass(Pooling);
+RegisterLayerClass(singa_pooling, Pooling);
 void Pooling::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   PoolingConf pool_conf = conf.pooling_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/pooling.h
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.h b/src/model/layer/pooling.h
index 6df292a..f844799 100644
--- a/src/model/layer/pooling.h
+++ b/src/model/layer/pooling.h
@@ -28,7 +28,7 @@ namespace singa {
 class Pooling : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Pooling"; }
+  // const std::string layer_type() const override { return "Pooling"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/prelu.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/prelu.cc b/src/model/layer/prelu.cc
index 421bcaa..a20972c 100644
--- a/src/model/layer/prelu.cc
+++ b/src/model/layer/prelu.cc
@@ -20,7 +20,7 @@
 #include "./prelu.h"
 namespace singa {
 
-RegisterLayerClass(PReLU);
+RegisterLayerClass(singa_prelu, PReLU);
 void PReLU::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;
@@ -82,7 +82,7 @@ const std::pair<Tensor, vector<Tensor> > PReLU::Backward(int flag,
   Tensor da;
   da.ResetLike(a_);
   if (!channel_shared_) {
-    size_t n, c, h, w;
+    size_t n = 0, c = 0, h = 0, w = 0;
     Tensor temp1 = (input <= 0.f);
     if (temp1.nDim() == 4) {
       if (format_ == "NCHW") {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/prelu.h
----------------------------------------------------------------------
diff --git a/src/model/layer/prelu.h b/src/model/layer/prelu.h
index 70a9dcf..3041d1e 100644
--- a/src/model/layer/prelu.h
+++ b/src/model/layer/prelu.h
@@ -27,7 +27,7 @@ namespace singa {
 class PReLU : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "PReLU"; }
+  //  const std::string layer_type() const override { return "PReLU"; }
 
 
   /// \copydoc Layer::Setup(const LayerConf&);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/rnn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/rnn.cc b/src/model/layer/rnn.cc
index 424c20b..524b462 100644
--- a/src/model/layer/rnn.cc
+++ b/src/model/layer/rnn.cc
@@ -22,7 +22,7 @@
 #include "singa/utils/string.h"
 
 namespace singa {
-
+RegisterLayerClass(singa_rnn, RNN);
 void RNN::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/rnn.h
----------------------------------------------------------------------
diff --git a/src/model/layer/rnn.h b/src/model/layer/rnn.h
index 1b5dad7..3369a00 100644
--- a/src/model/layer/rnn.h
+++ b/src/model/layer/rnn.h
@@ -35,7 +35,7 @@ namespace singa {
 class RNN : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "RNN"; }
+  // const std::string layer_type() const override { return "RNN"; }
 
   /// Setup the RNN layer.
   /// in_shape is the shape of a single training instance from one timestep,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc
index 6b1785c..6a49131 100644
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@ -19,7 +19,7 @@
 #include "./softmax.h"
 namespace singa {
 
-RegisterLayerClass(Softmax);
+RegisterLayerClass(singa_softmax, Softmax);
 void Softmax::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   CHECK_EQ(in_sample.size(), 1u);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/model/layer/softmax.h
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.h b/src/model/layer/softmax.h
index 837b23a..cf71587 100644
--- a/src/model/layer/softmax.h
+++ b/src/model/layer/softmax.h
@@ -24,7 +24,7 @@ namespace singa {
 class Softmax : public Layer {
  public:
   /// \copydoc Layer::layer_type()
-  const std::string layer_type() const override { return "Softmax"; }
+  // const std::string layer_type() const override { return "Softmax"; }
 
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const Shape& in_sample, const LayerConf& conf) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05720c21/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index 3db90bf..aff3587 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -73,3 +73,16 @@ def create_cuda_gpus(num):
 
 def create_cuda_gpu():
     return singa.Platform.CreateCudaGPUs(1)[0]
+
+
+def create_cuda_gpus_on(device_ids):
+    return singa.Platform.CreateCudaGPUsOn(device_ids)
+
+
+def create_cuda_gpu_on(device_id):
+    devices = create_cuda_gpus_on([device_id])
+    return devices[0]
+
+
+def get_default_device():
+    return singa.Platform.GetDefaultDevice()

[41/51] [abbrv] incubator-singa git commit: SINGA-227 Add Split and Merge Layer and add ResNet Implementation

Posted by wa...@apache.org.

SINGA-227 Add Split and Merge Layer and add ResNet Implementation

Add python resnet implementation and add Split and Merge Layer.

Discard split and merge layer in resnet implementation.

Add add_split and add_merge function where multiple inputs or outputs involved when creating net.

change comments in resnet.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/7ebea537
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/7ebea537
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/7ebea537

Branch: refs/heads/master
Commit: 7ebea537edfdd2b82e9aa2c8596033e0b2cab337
Parents: cdc5ffd
Author: jixin <ji...@comp.nus.edu.sg>
Authored: Wed Aug 10 23:29:40 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Aug 17 11:37:59 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/resnet.py | 350 ++++++++++++++++++++++++++++++++++++++++
 examples/cifar10/train.py  |  20 ++-
 src/model/layer/merge.cc   |  62 +++++++
 src/model/layer/merge.h    |  52 ++++++
 src/model/layer/split.cc   |  54 +++++++
 src/model/layer/split.h    |  52 ++++++
 src/proto/model.proto      |  12 ++
 7 files changed, 597 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7ebea537/examples/cifar10/resnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/resnet.py b/examples/cifar10/resnet.py
new file mode 100644
index 0000000..c9b3e2b
--- /dev/null
+++ b/examples/cifar10/resnet.py
@@ -0,0 +1,350 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+""" The resnet model is adapted from http://torch.ch/blog/2016/02/04/resnets.html
+The best validation accuracy we achieved is about 83% without data augmentation.
+The performance could be improved by tuning some hyper-parameters, including
+learning rate, weight decay, max_epoch, parameter initialization, etc.
+"""
+
+import sys
+import os
+import math
+import cPickle as pickle
+
+#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# use the python modules by installing py singa in build/python
+# pip install -e .
+
+from singa import tensor
+from singa import layer
+from singa import initializer
+from singa import metric
+from singa import loss
+from singa import net as ffnet
+from singa.proto.model_pb2 import kTrain, kEval
+
+class ResNet(object):
+
+    def __init__(self, loss=None, metric=None):
+        self.loss = loss
+        self.metric = metric
+        self.layers = []
+        self.src_layers = {}
+        self.dst_layers = {}
+        self.layer_shapes = {}
+        self.layer_names = []
+
+    def to_device(self, dev):
+        for lyr in self.layers:
+            lyr.to_device(dev)
+
+    def find(self, name):
+        for i in xrange(len(self.layers)):
+            if self.layers[i].name == name:
+                return self.layers[i]
+        assert False, "Undefined layer %s." % name
+        return None
+
+    def add(self, lyr, src_lyr_name=''):
+        """Append a layer into the layer list.
+        This function will get the sample shape from the last layer to setup
+        the newly added layer. For the first layer, it is setup outside.
+        The calling function should ensure the correctness of the layer order.
+        Args:
+            lyr (Layer): the layer to be added
+            src_lyr_name: list type, name of the src layer to the current layer
+        """
+        if len(self.layers) > 0 and lyr.has_setup is False:
+            #assert src_lyr_name in dst_layers, "Undefined src layer %s" % src_lyr_name
+            shape = self.layer_shapes[src_lyr_name]
+            lyr.setup(shape)
+        print lyr.name, ': ', lyr.get_output_sample_shape()
+        if src_lyr_name != '':
+            self.src_layers[lyr.name] = [src_lyr_name]
+        self.layers.append(lyr)
+        self.layer_shapes[lyr.name] = lyr.get_output_sample_shape()            
+        self.layer_names.append(lyr.name)
+
+        if src_lyr_name != '':
+            if src_lyr_name in self.dst_layers:
+                self.dst_layers[src_lyr_name].append(lyr.name)
+            else:
+                self.dst_layers[src_lyr_name] = [lyr.name]
+        if lyr.name in self.src_layers:
+            print 'src: ', self.src_layers[lyr.name]
+        else:
+            print 'src: null'
+        #print self.layer_names
+        print "----------------------------------------"
+
+    def add_split(self, lyr_name, src_lyr_name):
+        assert src_lyr_name in self.layer_shapes, "Undefined src layer %s." % src_lyr_name
+        self.src_layers[lyr_name] = [src_lyr_name]
+        self.layer_shapes[lyr_name] = self.layer_shapes[src_lyr_name]
+        self.layer_names.append(lyr_name)
+        if src_lyr_name in self.dst_layers:
+            self.dst_layers[src_lyr_name].append(lyr_name)
+        else:
+            self.dst_layers[src_lyr_name] = [lyr_name]
+        print lyr_name, ': ', self.layer_shapes[lyr_name]
+        if lyr_name in self.src_layers:
+            print 'src: ', self.src_layers[lyr_name]
+        else:
+            print 'src: null'
+        print "----------------------------------------"
+   
+    def add_merge(self, lyr_name, src_lyr_names):
+        self.src_layers[lyr_name] = src_lyr_names
+        self.layer_shapes[lyr_name] = self.layer_shapes[src_lyr_names[0]]
+        self.layer_names.append(lyr_name)
+        for i in xrange(len(src_lyr_names)):
+            if src_lyr_names[i] in self.dst_layers:
+                self.dst_layers[src_lyr_names[i]].append(lyr_name)
+            else:
+                self.dst_layers[src_lyr_names[i]] = [lyr_name]
+        print lyr_name, ': ', self.layer_shapes[lyr_name]
+        if lyr_name in self.src_layers:
+            print 'src: ', self.src_layers[lyr_name]
+        else:
+            print 'src: null'
+        print "----------------------------------------"
+
+    def param_values(self):
+        values = []
+        for lyr in self.layers:
+            values.extend(lyr.param_values())
+        return values
+
+    def param_specs(self):
+        specs = []
+        for lyr in self.layers:
+            specs.extend(lyr.param_specs)
+        return specs
+
+    def param_names(self):
+        return [spec.name for spec in self.param_specs()]
+
+    def train(self, x, y):
+        out = self.forward(kTrain, x)
+        l = self.loss.forward(kTrain, out, y)
+        if self.metric is not None:
+            m = self.metric.evaluate(out, y)
+        return self.backward(), (l.l1(), m)
+
+    def evaluate(self, x, y):
+        """Evaluate the loss and metric of the given data"""
+        out = self.forward(kEval, x)
+        l = None
+        m = None
+        assert self.loss is not None or self.metric is not None,\
+            'Cannot do evaluation, as neither loss nor metic is set'
+        if self.loss is not None:
+            l = self.loss.evaluate(kEval, out, y)
+        if self.metric is not None:
+            m = self.metric.evaluate(out, y)
+        return l, m
+
+    def predict(self, x):
+        xx = self.forward(kEval, x)
+        return tensor.softmax(xx)
+
+    def forward(self, flag, x):
+        #print x.l1()
+        outputs = {'': x}
+        for idx, name in enumerate(self.layer_names):
+            #print 'forward layer', name
+            if idx == 0:
+                outputs[name] = self.find(name).forward(flag, outputs[''])
+                del outputs['']
+                continue
+
+            if 'split' in name:
+                src = self.src_layers[name][0]
+                #print 'src: ', src
+                outputs[name] = []
+                for i in xrange(len(self.dst_layers[name])):
+                    outputs[name].append(outputs[src])
+                del outputs[src]
+            elif 'merge' in name:
+                srcs = self.src_layers[name]
+                #print 'src: ', srcs
+                for i in xrange(len(srcs)):
+                    if 'split' in srcs[i]:
+                       if i > 0:
+                            data += outputs[srcs[i]][0]
+                       else:
+                            data = outputs[srcs[i]][0]
+                       del outputs[srcs[i]][0]
+                       if len(outputs[srcs[i]]) == 0:
+                           del outputs[srcs[i]]
+                    else:
+                        if i > 0:
+                            data += outputs[srcs[i]]
+                        else:
+                            data = outputs[srcs[i]]
+                        del outputs[srcs[i]]
+                outputs[name] = data
+            else:
+                src = self.src_layers[name][0]
+                #print 'src: ', src
+                if 'split' in src:
+                    outputs[name] = self.find(name).forward(flag, outputs[src][0])
+                    del outputs[src][0]
+                    if len(outputs[src]) == 0:
+                        del outputs[src]
+                else:
+                    outputs[name] = self.find(name).forward(flag, outputs[src])
+                    del outputs[src]
+                
+        #    print lyr.name, x.l1()
+        return outputs[name]
+
+    def backward(self, flag=kTrain):
+        grad = self.loss.backward()
+        pgrads = []
+        in_grads = {'': grad}
+        for idx, name in enumerate(reversed(self.layer_names)):
+            #print 'backward layer', name
+            if idx == 0:
+                lyr = self.find(name)
+                grad, _pgrads = lyr.backward(flag, in_grads[''])
+                for g in reversed(_pgrads):
+                    pgrads.append(g)
+                in_grads[name] = grad
+                del in_grads['']
+                continue
+
+            if 'merge' in name:
+                src = self.dst_layers[name][0]
+                #print 'src: ', src
+                in_grads[name] = []
+                for i in xrange(len(self.src_layers[name])):
+                    in_grads[name].append(in_grads[src])
+                del in_grads[src]
+            elif 'split' in name:
+                srcs = self.dst_layers[name]
+                #print 'src: ', srcs
+                for i in xrange(len(srcs)):
+                    if 'merge' in srcs[i]:
+                       if i > 0:
+                            data += in_grads[srcs[i]][0]
+                       else:
+                            data = in_grads[srcs[i]][0]
+                       del in_grads[srcs[i]][0]
+                       if len(in_grads[srcs[i]]) == 0:
+                           del in_grads[srcs[i]]
+                    else:
+                        if i > 0:
+                            data += in_grads[srcs[i]]
+                        else:
+                            data = in_grads[srcs[i]]
+                        del in_grads[srcs[i]]
+                in_grads[name] = data
+            else:
+                src = self.dst_layers[name][0]
+                #print 'src: ', src
+                if 'merge' in src:
+                    grad, _pgrads = self.find(name).backward(flag, in_grads[src][0])
+                    del in_grads[src][0]
+                    if len(in_grads[src]) == 0:
+                        del in_grads[src]
+                else:
+                    grad, _pgrads = self.find(name).backward(flag, in_grads[src])
+                    del in_grads[src]
+                for g in reversed(_pgrads):
+                    pgrads.append(g)
+                in_grads[name] = grad
+
+
+        return reversed(pgrads)
+
+    def save(self, f):
+        """Save model parameters using cpickle"""
+        params = {}
+        for (specs, val) in zip(self.param_specs(), self.param_values()):
+            val.to_host()
+            params[specs.name] = tensor.to_numpy(val)
+        with open(f, 'wb') as fd:
+            pickle.dump(params, fd)
+
+    def load(self, f):
+        """Load model parameters using cpickle"""
+        with open(f, 'rb') as fd:
+            params = pickle.load(fd)
+        for (specs, val) in zip(self.param_specs(), self.param_values()):
+            val.copy_from_numpy(params[specs.name])
+
+def Block(net, name, nb_filters, stride, std, src):
+    #net.add(layer.Split("split" + name, 2), srcs)
+    net.add_split("split" + name, src)
+    if stride > 1:
+        net.add(layer.Conv2D("conv" + name + "_br1", nb_filters, 1, stride, pad=0), "split" + name)
+        net.add(layer.BatchNormalization("bn" + name + "_br1"), "conv" + name + "_br1")
+        net.add(layer.Conv2D("conv" + name + "_br2a", nb_filters, 3, stride, pad=1), "split" + name)
+    else:
+        net.add(layer.Conv2D("conv" + name + "_br2a", nb_filters, 3, stride, pad=1), "split" + name)
+    net.add(layer.BatchNormalization("bn" + name + "_br2a"), "conv" + name + "_br2a")
+    net.add(layer.Activation("relu" + name + "_br2a"), "bn" + name + "_br2a")
+    net.add(layer.Conv2D("conv" + name + "_br2b", nb_filters, 3, 1, pad=1), "relu" + name + "_br2a")
+    net.add(layer.BatchNormalization("bn" + name + "_br2b"), "conv" + name + "_br2b")
+    if stride > 1:
+        net.add_merge("merge" + name, ["bn" + name + "_br1", "bn" + name + "_br2b"])
+    else:
+        net.add_merge("merge" + name, ["split" + name, "bn" + name + "_br2b"])
+
+def create_net():
+    net = ResNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
+    net.add(layer.Conv2D("conv1", 16, 3, 1, pad=1, input_sample_shape=(3, 32, 32)))
+    net.add(layer.BatchNormalization("bn1"), "conv1")
+    net.add(layer.Activation("relu1"), "bn1")
+   
+    Block(net, "2a", 16, 1, 0.01, "relu1")
+    Block(net, "2b", 16, 1, 0.01, "merge2a")
+    Block(net, "2c", 16, 1, 0.01, "merge2b")
+
+    Block(net, "3a", 32, 2, 0.01, "merge2c")
+    Block(net, "3b", 32, 1, 0.01, "merge3a")
+    Block(net, "3c", 32, 1, 0.01, "merge3b")
+
+    Block(net, "4a", 64, 2, 0.01, "merge3c")
+    Block(net, "4b", 64, 1, 0.01, "merge4a")
+    Block(net, "4c", 64, 1, 0.01, "merge4b")
+
+    net.add(layer.AvgPooling2D("pool4", 8, 8, border_mode='valid'), "merge4c")
+    net.add(layer.Flatten('flat'), "pool4")
+    net.add(layer.Dense('ip5', 10), "flat")
+    net.add(layer.Softmax('softmax'), "ip5")
+    print 'Start intialization............'
+    for (p, name) in zip(net.param_values(), net.param_names()):
+        print name, p.shape
+        if 'mean' in name or 'beta' in name:
+            p.set_value(0.0)
+        elif 'var' in name:
+            p.set_value(1.0)
+        elif 'gamma' in name:
+            initializer.uniform(p, 0, 1)
+        elif len(p.shape) > 1:
+            if 'conv' in name:
+                #initializer.gaussian(p, 0, math.sqrt(2.0/p.shape[1]))
+                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0*p.shape[0])))
+            else:
+                initializer.gaussian(p, 0, 0.02)
+        else:
+            p.set_value(0)
+        print name, p.l1()
+
+    return net

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7ebea537/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index 8f596e5..6b7631e 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -33,7 +33,7 @@ from singa.proto import core_pb2
 
 import alexnet
 import vgg
-
+import resnet
 
 def load_dataset(filepath):
     print 'Loading data file %s' % filepath
@@ -94,6 +94,13 @@ def alexnet_lr(epoch):
     else:
         return 0.00001
 
+def resnet_lr(epoch):
+    if epoch < 80:
+        return 0.02
+    elif epoch < 120:
+        return 0.005
+    else:
+        return 0.001
 
 def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
           use_cpu=False):
@@ -152,9 +159,8 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
     net.save('model.bin')  # save model params into checkpoint file
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Train vgg/alexnet for '
-                                     'cifar10 dataset')
-    parser.add_argument('model', choices=['vgg', 'alexnet'], default='alexnet')
+    parser = argparse.ArgumentParser(description='Train vgg/alexnet for cifar10')
+    parser.add_argument('model', choices=['vgg', 'alexnet', 'resnet'], default='alexnet')
     parser.add_argument('data', default='cifar-10-batches-py')
     parser.add_argument('--use_cpu', action='store_true')
     args = parser.parse_args()
@@ -168,8 +174,12 @@ if __name__ == '__main__':
         net = alexnet.create_net(args.use_cpu)
         train((train_x, train_y, test_x, test_y), net, 160, alexnet_lr, 0.004,
               use_cpu=args.use_cpu)
-    else:
+    elif args.model == 'vgg':
         train_x, test_x = normalize_for_vgg(train_x, test_x)
         net = vgg.create_net(args.use_cpu)
         train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005,
               use_cpu=args.use_cpu)
+    else:
+        train_x, test_x = normalize_for_vgg(train_x, test_x)
+        net = resnet.create_net()
+        train((train_x, train_y, test_x, test_y), net, 200, resnet_lr, 1e-4)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7ebea537/src/model/layer/merge.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/merge.cc b/src/model/layer/merge.cc
new file mode 100644
index 0000000..a30c3b3
--- /dev/null
+++ b/src/model/layer/merge.cc
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "singa/model/layer.h"
+#include "./merge.h"
+namespace singa {
+
+RegisterLayerClass(singa_merge, Merge);
+
+void Merge::Setup(const Shape& in_sample, const LayerConf& conf) {
+  Layer::Setup(in_sample, conf);
+  MergeConf merge_conf = conf.merge_conf();
+  input_size_ = merge_conf.input_size();
+  out_sample_shape_ = in_sample;
+}
+
+const vector<Tensor> Merge::Forward(int flag, const vector<Tensor>& inputs) {
+  vector<Tensor> outputs;
+  //input_size_ = inputs.size();
+  if (input_size_ == 1u) {
+    outputs = inputs;
+  } else {
+    Tensor sum = inputs.at(0);
+    for (size_t i = 1; i < inputs.size(); i++) {
+      Tensor temp = inputs.at(i);
+      CHECK_EQ(sum.nDim(), temp.nDim());
+      for (size_t j = 0; j < temp.nDim(); j++)
+        CHECK_EQ(sum.shape(j), temp.shape(j));
+      sum += temp;
+    }
+    outputs.push_back(sum);
+  }
+  return outputs;
+}
+
+const std::pair<vector<Tensor>, vector<Tensor>> Merge::Backward(
+    int flag, const vector<Tensor>& grads) {
+  vector<Tensor> input_grad, param_grad;
+  if (grads.size() != 1u) {
+    LOG(INFO) << "Merge layer only have one output tensor.";
+  }
+  for (size_t i = 0; i < input_size_; i++)
+    input_grad.push_back(grads.at(0));
+  return std::make_pair(input_grad, param_grad);
+}
+
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7ebea537/src/model/layer/merge.h
----------------------------------------------------------------------
diff --git a/src/model/layer/merge.h b/src/model/layer/merge.h
new file mode 100644
index 0000000..9c34192
--- /dev/null
+++ b/src/model/layer/merge.h
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_MODEL_LAYER_MERGE_H_
+#define SRC_MODEL_LAYER_MERGE_H_
+#include <string>
+#include <utility>
+#include <vector>
+#include "singa/model/layer.h"
+
+namespace singa {
+class Merge : public Layer {
+ public:
+  /// \copydoc Layer::layer_type()
+  const std::string layer_type() const override { return "Merge"; }
+
+  /// \copydoc Layer::Setup(const LayerConf&);
+  void Setup(const Shape& in_sample, const LayerConf& conf) override;
+  const Shape GetOutputSampleShape() const override {
+    CHECK(out_sample_shape_.size()) << "You may haven't call Setup()";
+    return out_sample_shape_;
+  }
+  /// \copydoc Layer::Forward(int flag, const vector<Tensor>&)
+  const vector<Tensor> Forward(int flag, const vector<Tensor>& inputs) override;
+
+  /// \copydoc Layer::Backward(int, const vector<Tensor>&);
+  const std::pair<vector<Tensor>, vector<Tensor>> Backward(int flag,
+                                                   const vector<Tensor>& grads) override;
+
+  const size_t input_size() const { return input_size_; }
+
+ protected:
+  // To store the input and output(of forward) tensors
+  Shape out_sample_shape_;
+  size_t input_size_;
+};
+}  // namespace singa
+#endif  // SRC_MODEL_LAYER_MERGE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7ebea537/src/model/layer/split.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/split.cc b/src/model/layer/split.cc
new file mode 100644
index 0000000..fd1ab7d
--- /dev/null
+++ b/src/model/layer/split.cc
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "singa/model/layer.h"
+#include "./split.h"
+namespace singa {
+
+RegisterLayerClass(singa_split, Split);
+
+void Split::Setup(const Shape& in_sample, const LayerConf& conf) {
+  Layer::Setup(in_sample, conf);
+  SplitConf split_conf = conf.split_conf();
+  output_size_ = split_conf.output_size();
+  out_sample_shape_ = in_sample;
+}
+
+const vector<Tensor> Split::Forward(int flag, const vector<Tensor>& inputs) {
+  vector<Tensor> outputs;
+  if (inputs.size() != 1)
+    LOG(FATAL) << "Split layer only have one input tensor.";
+  for (size_t i = 0; i < output_size_; i++)
+    outputs.push_back(inputs.at(0));
+  return outputs;
+}
+
+const std::pair<vector<Tensor>, vector<Tensor>> Split::Backward(
+    int flag, const vector<Tensor>& grads) {
+  vector<Tensor> input_grad, param_grad;
+  CHECK_EQ(grads.size(), output_size_);
+  
+  /// Input_grad is the sum of all the output gradients.
+  Tensor temp = grads.at(0);
+  for (size_t i = 1; i < output_size_; i++)
+    temp += grads.at(i);
+  input_grad.push_back(temp);
+  return std::make_pair(input_grad, param_grad);
+}
+
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7ebea537/src/model/layer/split.h
----------------------------------------------------------------------
diff --git a/src/model/layer/split.h b/src/model/layer/split.h
new file mode 100644
index 0000000..79e70f6
--- /dev/null
+++ b/src/model/layer/split.h
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_MODEL_LAYER_SPLIT_H_
+#define SRC_MODEL_LAYER_SPLIT_H_
+#include <string>
+#include <utility>
+#include <vector>
+#include "singa/model/layer.h"
+
+namespace singa {
+class Split : public Layer {
+ public:
+  /// \copydoc Layer::layer_type()
+  const std::string layer_type() const override { return "Split"; }
+
+  /// \copydoc Layer::Setup(const LayerConf&);
+  void Setup(const Shape& in_sample, const LayerConf& conf) override;
+  const Shape GetOutputSampleShape() const override {
+    CHECK(out_sample_shape_.size()) << "You may haven't call Setup()";
+    return out_sample_shape_;
+  }
+  /// \copydoc Layer::Forward(int flag, const vector<Tensor>&)
+  const vector<Tensor> Forward(int flag, const vector<Tensor>& inputs) override;
+
+  /// \copydoc Layer::Backward(int, const vector<Tensor>&);
+  const std::pair<vector<Tensor>, vector<Tensor>> Backward(int flag,
+                                                   const vector<Tensor>& grads) override;
+
+  const size_t output_size() const { return output_size_; }
+
+ protected:
+  // To store the input and output(of forward) tensors
+  Shape out_sample_shape_;
+  size_t output_size_;
+};
+}  // namespace singa
+#endif  // SRC_MODEL_LAYER_SPLIT_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7ebea537/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 6923820..1796e9c 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -241,6 +241,8 @@ message LayerConf {
   optional DenseConf dense_conf = 201;
   optional MetricConf metric_conf = 200;
   optional BatchNormConf batchnorm_conf = 202;
+  optional SplitConf split_conf = 203;
+  optional MergeConf merge_conf = 204;
 }
 
 // Message that stores hyper-parameters used to apply transformation
@@ -948,3 +950,13 @@ message BatchNormConf {
   // newMean*factor + runningMean*(1-factor).
   optional double factor = 1 [default = 0.9];
 }
+
+message SplitConf {
+  // Indicate the number of outputs
+  optional int32 output_size = 1 [default = 2];
+}
+
+message MergeConf {
+  // Indicate the number of outputs
+  optional int32 input_size = 1 [default = 2];
+}

[37/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

Reorganize documentation files for generating html files

To generate html files:
In SINGA_ROOT/doc/, execute:
./build.sh html

To clean already generated html files:
In SINGA_ROOT/doc/, execute:
./build.sh clean


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a144a610
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a144a610
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a144a610

Branch: refs/heads/master
Commit: a144a610cb307a051a6e1f7b6cc08bd9baeb0137
Parents: 0a76425
Author: kaiping <ka...@comp.nus.edu.sg>
Authored: Tue Aug 16 14:30:10 2016 +0800
Committer: kaiping <ka...@comp.nus.edu.sg>
Committed: Tue Aug 16 14:30:10 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                    |  35 ----
 doc/_static/images/overview.png | Bin 0 -> 76106 bytes
 doc/_static/images/sgd.png      | Bin 0 -> 15553 bytes
 doc/_static/images/singa.png    | Bin 0 -> 203695 bytes
 doc/_templates/layout.html      |  52 ++++++
 doc/build.sh                    |  21 +++
 doc/conf.py                     | 339 +++++++++++++++++++++++++++++++++++
 doc/en/conf.py                  | 339 -----------------------------------
 doc/en/develop/schedule.rst     |   5 +-
 doc/en/docs/installation.md     |   2 +-
 doc/image/overview.png          | Bin 76106 -> 0 bytes
 doc/image/sgd.png               | Bin 15553 -> 0 bytes
 doc/image/singa.png             | Bin 203695 -> 0 bytes
 doc/zh/_templates/layout.html   |  61 -------
 doc/zh/conf.py                  | 339 -----------------------------------
 doc/zh/index.md                 |   9 -
 doc/zh/index.rst                |   9 +
 17 files changed, 424 insertions(+), 787 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
deleted file mode 100644
index f02595b..0000000
--- a/doc/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-PAPER         =
-BUILDDIR      = _build
-
-# Internal variables.
-PAPEROPT_a4     = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help
-help:
-	@echo "Please use \`make <target>' where <target> is one of"
-	@echo "  html       to make standalone HTML files"
-
-.PHONY: clean
-clean:
-	rm -rf $(BUILDDIR)/*
-	rm -rf en/docs/examples
-
-.PHONY: html
-html:
-	cp -rf ../examples en/docs/
-	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html/en
-	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/_static/images/overview.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/overview.png b/doc/_static/images/overview.png
new file mode 100644
index 0000000..a3244b3
Binary files /dev/null and b/doc/_static/images/overview.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/_static/images/sgd.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/sgd.png b/doc/_static/images/sgd.png
new file mode 100644
index 0000000..a0ec66f
Binary files /dev/null and b/doc/_static/images/sgd.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/_static/images/singa.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/singa.png b/doc/_static/images/singa.png
new file mode 100644
index 0000000..d9ce10f
Binary files /dev/null and b/doc/_static/images/singa.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html
new file mode 100755
index 0000000..1c10c5b
--- /dev/null
+++ b/doc/_templates/layout.html
@@ -0,0 +1,52 @@
+{#
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+#}
+{% extends "!layout.html" %}
+
+{% block extrahead %}
+    <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
+{% endblock %}
+     
+{% block footer %}
+
+<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
+<a href="http://incubator.apache.org/">
+<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
+</a>
+ 
+  <span class="rst-current-version" data-toggle="rst-current-version">
+    <span class="fa fa-book"> incubator-singa </span>
+    v: {{ version }}
+    <span class="fa fa-caret-down"></span>
+  </span>
+    <div class="rst-other-versions">
+        <dl>
+            <dt>Languages</dt>
+            <dd><a href="{{ pathto('../en/index.html', 1) }}">English</a></dd>
+            <dd><a href="{{ pathto('../zh/index.html', 1) }}">\u4e2d\u6587</a></dd>
+        </dl>
+    </div>
+</div>
+
+ <a href="https://github.com/apache/incubator-singa">
+    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
+        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
+        alt="Fork me on GitHub">
+</a>
+
+{{ super() }}
+{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/build.sh
----------------------------------------------------------------------
diff --git a/doc/build.sh b/doc/build.sh
new file mode 100755
index 0000000..db987d6
--- /dev/null
+++ b/doc/build.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+SPHINXBUILD="sphinx-build"
+BUILDDIR="_build"
+LANG_ARR=(en zh)
+
+if [ "$1"x = "clean"x ]; then
+	rm -rf $BUILDDIR/*
+	rm -rf en/docs/examples
+	echo "clean up $BUILDDIR"
+fi
+
+
+if [ "$1"x = "html"x ]; then
+	cp -rf ../examples en/docs/
+	for (( i=0; i<${#LANG_ARR[@]}; i++)) do
+		echo "building language ${LANG_ARR[i]} ..."
+		$SPHINXBUILD -b html -c . -d $BUILDDIR/doctree ${LANG_ARR[i]} $BUILDDIR/html/${LANG_ARR[i]}
+	done
+	echo "<script language=\"javascript\" type=\"text/javascript\">window.location.href='en/index.html';</script>" > $BUILDDIR/html/index.html
+fi

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100755
index 0000000..86dc031
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+#
+# incubator-singa documentation build configuration file, created by
+# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+from recommonmark.parser import CommonMarkParser
+
+source_parsers = {
+    '.md': CommonMarkParser,
+}
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+   
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ['.rst', '.md']
+
+# The encoding of source files.
+#
+# source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'incubator-singa'
+copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
+author = u'moaz'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'1.0.0'
+# The full version, including alpha/beta/rc tags.
+release = u'1.0.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+# html_title = u'Singa v1.0.0'
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+html_logo = '/singa.png'
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+html_show_sourcelink = False
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Singadoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
+     u'moaz', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     author, 'incubator-singa', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/en/conf.py
----------------------------------------------------------------------
diff --git a/doc/en/conf.py b/doc/en/conf.py
deleted file mode 100755
index 46a48f6..0000000
--- a/doc/en/conf.py
+++ /dev/null
@@ -1,339 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# incubator-singa documentation build configuration file, created by
-# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../../build/python'))
-
-# -- General configuration ------------------------------------------------
-from recommonmark.parser import CommonMarkParser
-
-source_parsers = {
-    '.md': CommonMarkParser,
-}
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
-napoleon_google_docstring = True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = ['.rst', '.md']
-
-# The encoding of source files.
-#
-source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'incubator-singa'
-copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
-author = u'moaz'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = u'1.0.0'
-# The full version, including alpha/beta/rc tags.
-release = u'1.0.0'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#
-# today = ''
-#
-# Else, today_fmt is used as the format for a strftime call.
-#
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-# keep_warnings = False
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents.
-# "<project> v<release> documentation" by default.
-#
-# html_title = u'Singa v1.0.0'
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#
-html_logo = 'image/singa.png'
-
-# The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#
-# html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['../_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#
-# html_extra_path = []
-
-# If not None, a 'Last updated on:' timestamp is inserted at every page
-# bottom, using the given strftime format.
-# The empty string is equivalent to '%b %d, %Y'.
-#
-# html_last_updated_fmt = None
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-#
-# html_domain_indices = True
-
-# If false, no index is generated.
-#
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#
-html_show_sourcelink = False
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
-#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
-#
-# html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# 'ja' uses this config value.
-# 'zh' user can custom change `jieba` dictionary path.
-#
-# html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#
-# html_search_scorer = 'scorer.js'
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'Singadoc'
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-     # The paper size ('letterpaper' or 'a4paper').
-     #
-     # 'papersize': 'letterpaper',
-
-     # The font size ('10pt', '11pt' or '12pt').
-     #
-     # 'pointsize': '10pt',
-
-     # Additional stuff for the LaTeX preamble.
-     #
-     # 'preamble': '',
-
-     # Latex figure (float) alignment
-     #
-     # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
-     u'moaz', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-#
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#
-# latex_appendices = []
-
-# If false, no module index is generated.
-#
-# latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     [author], 1)
-]
-
-# If true, show URL addresses after external links.
-#
-# man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     author, 'incubator-singa', 'One line description of project.',
-     'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-#
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#
-# texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#
-# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/en/develop/schedule.rst
----------------------------------------------------------------------
diff --git a/doc/en/develop/schedule.rst b/doc/en/develop/schedule.rst
index 2afe54f..2cf81f1 100644
--- a/doc/en/develop/schedule.rst
+++ b/doc/en/develop/schedule.rst
@@ -29,12 +29,11 @@ Development Schedule
 	"                  "," Installation            "," Remove dependency on ZeroMQ, CZMQ, Zookeeper for single node training","done"
 	"                  "," Updater                 "," Add new SGD updaters including Adam, AdamMax and AdaDelta","done"
 	"                  "," Binding                 "," Enhance Python binding for training","done"
-	"1.0 July 2016     "," Programming abstraction ","Tensor with linear algebra, neural net and random operations "," "
+	"1.0 Aug 2016     "," Programming abstraction ","Tensor with linear algebra, neural net and random operations "," "
 	"                  ","                         ","Updater for distributed parameter updating ",""
 	"                  "," Optimization            "," Execution and memory optimization",""
 	"                  "," Hardware                "," Use Cuda and Cudnn for Nvidia GPU",""
 	"                  ","                         "," Use OpenCL for AMD GPU or other devices",""
 	"                  "," Cross-platform          "," To extend from Linux to MacOS and Windows",""
 	"                  "," Examples                "," Speech recognition example",""
-	"                  ","                         ","Large image models, e.g., [GoogLeNet](http://arxiv.org/abs/1409.4842), [VGG](https://arxiv.org/pdf/1409.1556.pdf) and [Residual Net](http://arxiv.org/abs/1512.03385)",""
-	"     "," Rafiki                  "," Deep learning as a service "," "
+	"                  ","                         ","Large image models, e.g., [VGG](https://arxiv.org/pdf/1409.1556.pdf) and [Residual Net](http://arxiv.org/abs/1512.03385)",""

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/en/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/installation.md b/doc/en/docs/installation.md
index 5d3c8a2..bff8e89 100755
--- a/doc/en/docs/installation.md
+++ b/doc/en/docs/installation.md
@@ -29,7 +29,7 @@ Most of the dependent libraries could be installed via package mangers like
 apt-get or homebrew.
 
     # for ubuntu users, tested on 14.04
-    sudo apt-get install libprotobuf-dev libopencv-dev protobuf-compiler libgoogle-glog-dev liblmdb-dev, python2.7-dev, python-pip, python-numpy
+    sudo apt-get install libprotobuf-dev libopenblas-dev libopencv-dev protobuf-compiler libgoogle-glog-dev liblmdb-dev python2.7-dev python-pip python-numpy
 
     # for Mac OS users
     brew install -vd glog lmdb

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/image/overview.png
----------------------------------------------------------------------
diff --git a/doc/image/overview.png b/doc/image/overview.png
deleted file mode 100644
index a3244b3..0000000
Binary files a/doc/image/overview.png and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/image/sgd.png
----------------------------------------------------------------------
diff --git a/doc/image/sgd.png b/doc/image/sgd.png
deleted file mode 100644
index a0ec66f..0000000
Binary files a/doc/image/sgd.png and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/image/singa.png
----------------------------------------------------------------------
diff --git a/doc/image/singa.png b/doc/image/singa.png
deleted file mode 100644
index d9ce10f..0000000
Binary files a/doc/image/singa.png and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/zh/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/zh/_templates/layout.html b/doc/zh/_templates/layout.html
deleted file mode 100755
index 6b9f2c5..0000000
--- a/doc/zh/_templates/layout.html
+++ /dev/null
@@ -1,61 +0,0 @@
-{#
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements.  See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership.  The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-#}
-{% extends "!layout.html" %}
-
-{% block extrahead %}
-    <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
-{% endblock %}
-     
-{% block footer %}
-
-<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
-<a href="http://incubator.apache.org/">
-<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
-</a>
- 
-  <span class="rst-current-version" data-toggle="rst-current-version">
-    <span class="fa fa-book"> incubator-singa </span>
-    v: {{ version }}
-    <span class="fa fa-caret-down"></span>
-  </span>
-  <div class="rst-other-versions">
-    <dl>
-      <dt>Languages</dt>
-      <dd><a href="{{pathto(''+ '../index.html' , 1) }}">English</a></dd>
-      <dd><a href="">\u4e2d\u6587</a></dd>	  
-	  <!--dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
-	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
-	  <dd><a href="/it/latest/">Italiano</a></dd>
-	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd-->
-    </dl>
-    <dl>
-      <dt>Versions</dt>
-      <dd><a href="/{{ language }}/latest/">latest</a></dd>
-      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
-    </dl>
-  </div>
-</div>
-
- <a href="https://github.com/apache/incubator-singa">
-    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
-        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
-        alt="Fork me on GitHub">
-</a>
-
-{{ super() }}
-{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/zh/conf.py
----------------------------------------------------------------------
diff --git a/doc/zh/conf.py b/doc/zh/conf.py
deleted file mode 100755
index 921a27a..0000000
--- a/doc/zh/conf.py
+++ /dev/null
@@ -1,339 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# incubator-singa documentation build configuration file, created by
-# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../../build/python'))
-
-# -- General configuration ------------------------------------------------
-from recommonmark.parser import CommonMarkParser
-
-source_parsers = {
-    '.md': CommonMarkParser,
-}
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
-napoleon_google_docstring = True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = ['.rst', '.md']
-
-# The encoding of source files.
-#
-source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'incubator-singa'
-copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
-author = u'moaz'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = u'1.0.0'
-# The full version, including alpha/beta/rc tags.
-release = u'1.0.0'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#
-# today = ''
-#
-# Else, today_fmt is used as the format for a strftime call.
-#
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-# keep_warnings = False
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents.
-# "<project> v<release> documentation" by default.
-#
-# html_title = u'Singa v1.0.0'
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#
-html_logo = 'image/singa.png'
-
-# The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#
-# html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['../_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#
-# html_extra_path = []
-
-# If not None, a 'Last updated on:' timestamp is inserted at every page
-# bottom, using the given strftime format.
-# The empty string is equivalent to '%b %d, %Y'.
-#
-# html_last_updated_fmt = None
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-#
-# html_domain_indices = True
-
-# If false, no index is generated.
-#
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#
-html_show_sourcelink = False
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
-#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
-#
-# html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# 'ja' uses this config value.
-# 'zh' user can custom change `jieba` dictionary path.
-#
-# html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#
-# html_search_scorer = 'scorer.js'
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'Singadoc'
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-     # The paper size ('letterpaper' or 'a4paper').
-     #
-     # 'papersize': 'letterpaper',
-
-     # The font size ('10pt', '11pt' or '12pt').
-     #
-     # 'pointsize': '10pt',
-
-     # Additional stuff for the LaTeX preamble.
-     #
-     # 'preamble': '',
-
-     # Latex figure (float) alignment
-     #
-     # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
-     u'moaz', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-#
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#
-# latex_appendices = []
-
-# If false, no module index is generated.
-#
-# latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     [author], 1)
-]
-
-# If true, show URL addresses after external links.
-#
-# man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     author, 'incubator-singa', 'One line description of project.',
-     'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-#
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#
-# texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#
-# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/zh/index.md b/doc/zh/index.md
deleted file mode 100644
index 4b49d5f..0000000
--- a/doc/zh/index.md
+++ /dev/null
@@ -1,9 +0,0 @@
-SINGA \u4e2d\u6587\u6587\u6863
-==============
-
-.. toctree::
-
-   overview
-   installation_source
-   programming-guide
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a144a610/doc/zh/index.rst
----------------------------------------------------------------------
diff --git a/doc/zh/index.rst b/doc/zh/index.rst
new file mode 100644
index 0000000..4b49d5f
--- /dev/null
+++ b/doc/zh/index.rst
@@ -0,0 +1,9 @@
+SINGA \u4e2d\u6587\u6587\u6863
+==============
+
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide
+

[45/51] [abbrv] incubator-singa git commit: SINGA-240 Add license for singa source

Posted by wa...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1c5ca229/test/singa/test_timer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_timer.cc b/test/singa/test_timer.cc
index fa6c9af..2b3bd05 100644
--- a/test/singa/test_timer.cc
+++ b/test/singa/test_timer.cc
@@ -1,3 +1,23 @@
+/**
+ * Copyright 2015 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "gtest/gtest.h"
 #include "singa/utils/timer.h"

[17/51] [abbrv] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.

SINGA-237 New documentation files for SINGA v1.0

Added readme file for the cifar-10 examples.
Updated the uniform and gaussian methods in initializer.py to include the
fan_in and fan_out arguments.
Reformat some python files.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d3a57cfc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d3a57cfc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d3a57cfc

Branch: refs/heads/master
Commit: d3a57cfc2b71abadf992e9f0900a4051da8e4232
Parents: 8cd5530
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sun Aug 14 21:41:16 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Sun Aug 14 21:41:16 2016 +0800

----------------------------------------------------------------------
 doc/docs/examples.rst           |   6 --
 doc/docs/index.rst              |   2 +-
 doc/docs/initializer.rst        |   2 +-
 examples/char-rnn/README.md     |   2 +-
 examples/char-rnn/train.py      | 103 +++++++++++++++++++++--------------
 examples/cifar10/alexnet.py     |  48 +++++++++++++---
 examples/cifar10/predict.py     |  10 ++--
 examples/cifar10/vgg.py         |  12 ++--
 examples/index.rst              |   4 ++
 src/python/singa/initializer.py |  85 ++++++++++++++---------------
 src/python/singa/optimizer.py   |   4 +-
 11 files changed, 157 insertions(+), 121 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
deleted file mode 100644
index b0b2af8..0000000
--- a/doc/docs/examples.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Examples
-========
-
-.. toctree::
-
-   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 2294054..11f0ebb 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -12,4 +12,4 @@ English
    loss
    metric
    optimizer
-   examples
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
index a190702..f334497 100644
--- a/doc/docs/initializer.rst
+++ b/doc/docs/initializer.rst
@@ -5,7 +5,7 @@ Python API
 ----------
 
 .. automodule:: singa.initializer
-   :members:
+   :members: uniform, gaussian
    :member-order: bysource
 
 CPP API

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/char-rnn/README.md
----------------------------------------------------------------------
diff --git a/examples/char-rnn/README.md b/examples/char-rnn/README.md
index d4cfa30..f6e5edc 100644
--- a/examples/char-rnn/README.md
+++ b/examples/char-rnn/README.md
@@ -1,4 +1,4 @@
-# Train Char-RNN using SINGA
+# Train Char-RNN over plain text
 
 Recurrent neural networks (RNN) are widely used for modelling sequential data,
 e.g., natural language sentences. This example describes how to implement a RNN

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index fb5e71f..1273a57 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -19,8 +19,6 @@ The model is created following https://github.com/karpathy/char-rnn
 The train file could be any text file,
 e.g., http://cs.stanford.edu/people/karpathy/char-rnn/
 '''
-import sys
-import os
 import cPickle as pickle
 import numpy as np
 import argparse
@@ -32,12 +30,12 @@ from singa import device
 from singa import tensor
 from singa import optimizer
 from singa import initializer
-from singa.proto import core_pb2
 from singa.proto import model_pb2
 from singa import utils
 
 
 class Data(object):
+
     def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8):
         '''Data object for loading a plain text file.
 
@@ -49,8 +47,8 @@ class Data(object):
         self.raw_data = open(fpath, 'r').read()  # read text file
         chars = list(set(self.raw_data))
         self.vocab_size = len(chars)
-        self.char_to_idx = {ch:i for i, ch in enumerate(chars)}
-        self.idx_to_char = {i:ch for i, ch in enumerate(chars)}
+        self.char_to_idx = {ch: i for i, ch in enumerate(chars)}
+        self.idx_to_char = {i: ch for i, ch in enumerate(chars)}
         data = [self.char_to_idx[c] for c in self.raw_data]
         # seq_length + 1 for the data + label
         nsamples = len(data) / (1 + seq_length)
@@ -69,10 +67,10 @@ class Data(object):
 
 def numpy2tensors(npx, npy, dev):
     '''batch, seq, dim -- > seq, batch, dim'''
-    tmpx=np.swapaxes(npx, 0, 1)
-    tmpy=np.swapaxes(npy, 0, 1)
-    inputs=[]
-    labels=[]
+    tmpx = np.swapaxes(npx, 0, 1)
+    tmpy = np.swapaxes(npy, 0, 1)
+    inputs = []
+    labels = []
     for t in range(tmpx.shape[0]):
         x = tensor.from_numpy(tmpx[t])
         y = tensor.from_numpy(tmpy[t])
@@ -99,25 +97,36 @@ def get_lr(epoch):
     return 0.001 / float(1 << (epoch / 50))
 
 
-def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
-        num_stacks=1, lr=0.001, dropout = 0.5, model_path='model.bin'):
+def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
+          num_stacks=1, lr=0.001, dropout=0.5, model_path='model.bin'):
     # SGD with L2 gradient normalization
     opt = optimizer.SGD(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
-    rnn = layer.LSTM(name='lstm', hidden_size=hidden_size, num_stacks=num_stacks,
-            dropout=dropout, input_sample_shape=(data.vocab_size,))
+    rnn = layer.LSTM(
+        name='lstm',
+        hidden_size=hidden_size,
+        num_stacks=num_stacks,
+        dropout=dropout,
+        input_sample_shape=(
+            data.vocab_size,
+        ))
     rnn.to_device(cuda)
     print 'created rnn'
     rnn_w = rnn.param_values()[0]
-    initializer.uniform(rnn_w, -0.08, 0.08)  # init all rnn parameters
+    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
     print 'rnn weight l1 = %f' % (rnn_w.l1())
-    dense = layer.Dense('dense', data.vocab_size, input_sample_shape=(hidden_size,))
+    dense = layer.Dense(
+        'dense',
+        data.vocab_size,
+        input_sample_shape=(
+            hidden_size,
+        ))
     dense.to_device(cuda)
     dense_w = dense.param_values()[0]
     dense_b = dense.param_values()[1]
     print 'dense w ', dense_w.shape
     print 'dense b ', dense_b.shape
-    initializer.xavier(dense_w) # init weight matrix using Xavier
+    initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
     print 'dense weight l1 = %f' % (dense_w.l1())
     dense_b.set_value(0.0)
     print 'dense b l1 = %f' % (dense_b.l1())
@@ -125,18 +134,18 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)
     g_dense_b = tensor.Tensor(dense_b.shape, cuda)
 
-    lossfun = loss.SoftmaxCrossEntropy();
+    lossfun = loss.SoftmaxCrossEntropy()
     for epoch in range(max_epoch):
         train_loss = 0
         for b in range(data.num_train_batch):
             batch = data.train_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
 
             outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
-            grads=[]
+            grads = []
             batch_loss = 0
             g_dense_w.set_value(0.0)
             g_dense_b.set_value(0.0)
@@ -149,52 +158,62 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
                 grads.append(grad)
                 g_dense_w += gwb[0]
                 g_dense_b += gwb[1]
-                #print output.l1(), act.l1()
-            utils.update_progress(b * 1.0 / data.num_train_batch,
-                    'training loss = %f' % (batch_loss / seq_length))
+                # print output.l1(), act.l1()
+            utils.update_progress(
+                b * 1.0 / data.num_train_batch, 'training loss = %f' %
+                (batch_loss / seq_length))
             train_loss += batch_loss
 
             grads.append(tensor.Tensor())
             grads.append(tensor.Tensor())
-            g_rnn_w=rnn.backward(model_pb2.kTrain, grads)[1][0]
+            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
             dense_w, dense_b = dense.param_values()
             opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')
-        print '\nEpoch %d, train loss is %f' % (epoch,
-                train_loss / data.num_train_batch / seq_length)
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_w, dense_w, 'dense_w')
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_b, dense_b, 'dense_b')
+        print '\nEpoch %d, train loss is %f' % \
+            (epoch, train_loss / data.num_train_batch / seq_length)
+
         eval_loss = 0
         for b in range(data.num_test_batch):
             batch = data.val_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
             outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
             for output, label in zip(outputs, labels):
                 output = dense.forward(model_pb2.kEval, output)
-                eval_loss += lossfun.forward(model_pb2.kEval, output, label).l1()
-        print 'Epoch %d, evaluation loss is %f' % (epoch,
-                eval_loss / data.num_test_batch / seq_length)
+                eval_loss += lossfun.forward(model_pb2.kEval,
+                                             output, label).l1()
+        print 'Epoch %d, evaluation loss is %f' % \
+            (epoch, eval_loss / data.num_test_batch / seq_length)
 
     # checkpoint the file model
     with open(model_path, 'wb') as fd:
         print 'saving model to %s' % model_path
-        d={}
-        for name, w in zip(['rnn_w', 'dense_w', 'dense_b'], [rnn_w, dense_w, dense_b]):
+        d = {}
+        for name, w in zip(
+                ['rnn_w', 'dense_w', 'dense_b'],
+                [rnn_w, dense_w, dense_b]):
             w.to_host()
-            d[name]=tensor.to_numpy(w)
-        d['idx_to_char']=data.idx_to_char
-        d['char_to_idx']=data.char_to_idx
-        d['hidden_size']=hidden_size
-        d['num_stacks']=num_stacks
-        d['dropout']=dropout
+            d[name] = tensor.to_numpy(w)
+        d['idx_to_char'] = data.idx_to_char
+        d['char_to_idx'] = data.char_to_idx
+        d['hidden_size'] = hidden_size
+        d['num_stacks'] = num_stacks
+        d['dropout'] = dropout
 
         pickle.dump(d, fd)
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Train multi-stack LSTM for '\
-            'modeling  character sequence from plain text files')
+    parser = argparse.ArgumentParser(
+        description='Train multi-stack LSTM for '
+        'modeling  character sequence from plain text files')
     parser.add_argument('data', type=str, help='training file')
     parser.add_argument('-b', type=int, default=32, help='batch_size')
     parser.add_argument('-l', type=int, default=64, help='sequence length')
@@ -204,4 +223,4 @@ if __name__ == '__main__':
     args = parser.parse_args()
     data = Data(args.data, batch_size=args.b, seq_length=args.l)
     train(data, args.m,  hidden_size=args.d, num_stacks=args.s,
-            seq_length=args.l, batch_size=args.b)
+          seq_length=args.l, batch_size=args.b)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index ddad1d5..34da95d 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -20,12 +20,8 @@ Following the same setting for hyper-parameters and data pre-processing, the fin
 validation accuracy would be about 82%.
 '''
 
-import sys
-import os
-
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
-from singa import initializer
 from singa import metric
 from singa import loss
 from singa import net as ffnet
@@ -40,23 +36,57 @@ def create_net(use_cpu=False):
     W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
     W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
     b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
-    net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
+    net.add(
+        layer.Conv2D(
+            'conv1',
+            32,
+            5,
+            1,
+            W_specs=W0_specs.copy(),
+            b_specs=b_specs.copy(),
+            pad=2,
+            input_sample_shape=(
+                3,
+                32,
+                32,
+            )))
     net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
     net.add(layer.Activation('relu1'))
     net.add(layer.LRN(name='lrn1'))
-    net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv2',
+            32,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu2'))
     net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
     net.add(layer.LRN('lrn2'))
-    net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv3',
+            64,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu3'))
     net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
     net.add(layer.Flatten('flat'))
-    net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
+    net.add(
+        layer.Dense(
+            'dense',
+            10,
+            W_specs=W2_specs.copy(),
+         b_specs=b_specs.copy()))
     for (p, specs) in zip(net.param_values(), net.param_specs()):
         filler = specs.filler
         if filler.type == 'gaussian':
-            initializer.gaussian(p, filler.mean, filler.std)
+            p.gaussian(filler.mean, filler.std)
         else:
             p.set_value(0)
         print specs.name, filler.type, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index 8a9ea4e..307a610 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -16,28 +16,26 @@
 # =============================================================================
 import cPickle as pickle
 import numpy as np
-import sys
-import os
 
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import device
 from singa import tensor
 import net as ffnet
 
 
-def predict(net, images, cuda, topk=5):
+def predict(net, images, dev, topk=5):
     '''Predict the label of each image.
 
     Args:
         net, a pretrained neural net
         images, a batch of images [batch_size, 3, 32, 32], which have been
             pre-processed
-        cuda, the cuda device
+        dev, the training device
         topk, return the topk labels for each image.
     '''
     x = tensor.from_numpy(images.astype(np.float32))
-    x.to_device(cuda)
+    x.to_device(dev)
     y = net.predict(x)
     y.to_host()
     y = tensor.to_numpy(y)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 327592f..29a4b40 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -20,11 +20,7 @@ The performance could be improved by tuning some hyper-parameters, including
 learning rate, weight decay, max_epoch, parameter initialization, etc.
 """
 
-import sys
-import os
-import math
-
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import layer
 from singa import initializer
@@ -86,11 +82,11 @@ def create_net(use_cpu=False):
             elif 'var' in name:
                 p.set_value(1.0)
             elif 'gamma' in name:
-                initializer.uniform(p, 0, 1)
+                p.uniform(0, 1)
             elif 'conv' in name:
-                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+                initializer.gaussian(p, 0, 3 * 3 * p.shape[0])
             else:
-                initializer.gaussian(p, 0, 0.02)
+                p.gaussian(0, 0.02)
         else:
             p.set_value(0)
         print name, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index d6faf5d..4bb5b49 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,5 +1,9 @@
+Examples
+========
+
 .. toctree::
 
+   cifar10/README
    char-rnn/README
    imagenet/README
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 277fd2f..fb99663 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -23,77 +23,68 @@ Example usages::
     from singa import initializer
 
     x = tensor.Tensor((3, 5))
-    initializer.xavier(x)
+    initializer.uniform(x, 3, 5) # use both fan_in and fan_out
+    initializer.uniform(x, 3, 0)  # use only fan_in
 '''
 
 import math
 
 
-'''
-TODO(wangwei) update the uniform and gaussian initializers
-
 def uniform(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
-    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    '''Initialize the values of the input tensor following a uniform
+    distribution with specific bounds.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
+
+    Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
     training deep feedforward neuralnetworks.
 
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+        avg = 1
+    x = math.sqrt(3.0 * avg / (fan_in + fan_out))
     t.uniform(-x, x)
 
 
 def gaussian(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
+    '''Initialize the values of the input tensor following a Gaussian
+    distribution with specific std.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
 
     Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
     Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
-
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+        avg = 1
+    std = math.sqrt(2.0 * avg / (fan_in + fan_out))
     t.gaussian(0, std)
-'''
-
-
-def uniform(t, low=0, high=1):
-    '''Initialize the parameter values following an Uniform distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        low (float): lower bound
-        high (float): higher bound
-    '''
-    t.uniform(low, high)
-
-
-def gaussian(t, mean=0, std=0.01):
-    '''Initialize the parameter values following an Gaussian distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        mean (float): mean of the distribution
-        std (float): standard variance
-    '''
-    t.gaussian(mean, std)
 
 
 def xavier(t):
     '''Initialize the matrix parameter follow a Uniform distribution from
     [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
 
+    Deprecated. Please use uniform()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -106,6 +97,8 @@ def glorot(t):
     '''Initialize the matrix parameter follow a Gaussian distribution with
     mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
 
+    Deprecated. Please use gaussian()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -118,6 +111,8 @@ def msra(t):
     '''Initialize the matrix parameter follow a Guassian distribution with
     mean = 0, std = math.sqrt(2.0 / nb_row).
 
+    Deprecated. Please use gaussian()
+
     Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
     nonlinearities.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 5d38997..7c8cc39 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -44,8 +44,8 @@ class Optimizer(object):
 
     1. construct the optimizer
     2. (optional) register each parameter with its specs.
-    3. use the optimizer to update parameter values given parameter
-        gradients and other optional info
+    3. use the optimizer to update parameter values given parameter gradients
+       and other optional info
 
     The subclasses should override the apply_with_lr function to do the real
     parameter udpate.

[14/51] [abbrv] incubator-singa git commit: Merge PR #228 which updates cmake files to enable 'make install'.

Posted by wa...@apache.org.

Merge PR #228 which updates cmake files to enable 'make install'.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/dffae6bf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/dffae6bf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/dffae6bf

Branch: refs/heads/master
Commit: dffae6bf33b316a49ed1315e439b8e8948552f1c
Parents: a91bf2a 230230c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sun Aug 14 13:59:35 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Sun Aug 14 13:59:35 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt           |  7 +++++++
 cmake/Dependencies.cmake |  7 +++++++
 src/CMakeLists.txt       | 10 +++++-----
 test/CMakeLists.txt      |  5 ++---
 4 files changed, 21 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dffae6bf/CMakeLists.txt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dffae6bf/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --cc cmake/Dependencies.cmake
index ceef429,4db8a85..eb729db
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@@ -5,9 -5,16 +5,16 @@@ SET(SINGA_LINKER_LIBS ""
  FIND_PACKAGE( Protobuf REQUIRED )
  INCLUDE_DIRECTORIES(SYSTEM ${PROTOBUF_INCLUDE_DIR})
  MESSAGE(STATUS "proto libs " ${PROTOBUF_LIBRARIES})
 -LIST(APPEND singa_linker_libs ${PROTOBUF_LIBRARIES})
 +LIST(APPEND SINGA_LINKER_LIBS ${PROTOBUF_LIBRARIES})
  INCLUDE("cmake/Protobuf.cmake")
  
+ #FIND_PACKAGE(Glog)
+ #IF(GLOG_FOUND)
+ #    MESSAGE(STATUS "GLOG FOUND at ${GLOG_INCLUDE_DIR}")
+ #    ADD_DEFINITIONS("-DUSE_GLOG")
+ #    LIST(APPEND SINGA_LINKER_LIBS ${GLOG_LIBRARIES})
+ #ENDIF()
+ 
  IF(USE_LMDB)
      FIND_PACKAGE(LMDB REQUIRED)
      INCLUDE_DIRECTORIES(SYSTEM ${LMDB_INCLUDE_DIR})

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dffae6bf/src/CMakeLists.txt
----------------------------------------------------------------------
diff --cc src/CMakeLists.txt
index 66d89dc,f6fa698..4579a67
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@@ -20,10 -19,8 +20,10 @@@ FOREACH(fil ${proto_hdrs}
          #COMMAND ${CMAKE_COMMAND} -E echo "copy done"
          )
  ENDFOREACH()
- LIST(APPEND SINGA_LINKER_LIBS proto)
+ LIST(APPEND SINGA_LINKER_LIBS singa_proto)
  
 +SET(PREVIOUS_LINKER_LIBS ${SINGA_LINKER_LIBS})
 +
  #FILE(GLOB_RECURSE utils_source ${CMAKE_CURRENT_SOURCE_DIR}/utils/ "*.cc")
  AUX_SOURCE_DIRECTORY(utils utils_source)
  #message(STATUS "UTILS ${utils_source}")

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dffae6bf/test/CMakeLists.txt
----------------------------------------------------------------------
diff --cc test/CMakeLists.txt
index f196928,1c2550b..6fc4d77
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@@ -17,11 -9,9 +17,10 @@@ IF(NOT USE_OPENCL
      LIST(REMOVE_ITEM singa_test_source "singa/test_opencl.cc")
  ENDIF()
  
 +
  ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
  ADD_DEPENDENCIES(test_singa singa_core singa_utils)
 -MESSAGE(STATUS "link libs" ${singa_linker_libs})
 +#MESSAGE(STATUS "link libs" ${singa_linker_libs})
  TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
-     singa_io proto protobuf ${SINGA_LINKER_LIBS})
- SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread ")
- 
+     singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
+ SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")

[15/51] [abbrv] incubator-singa git commit: SINGA-238 RBM on MNIST

Posted by wa...@apache.org.

SINGA-238 RBM on MNIST

Enable the training on GPU.
Fixed a bug from KernelSum() by removing it and implemented the
Sum(Tensor)->float function using Dot() (from blas).


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5b332a40
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5b332a40
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5b332a40

Branch: refs/heads/master
Commit: 5b332a4086ff32b0c3a298169c0befef78f003ca
Parents: e1a524d
Author: Wei Wang <wa...@gmail.com>
Authored: Sun Aug 14 17:07:22 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 17:07:22 2016 +0800

----------------------------------------------------------------------
 examples/mnist/README.md           |  21 ++-
 examples/mnist/train.py            | 265 ++++++++++++++++----------------
 include/singa/model/loss.h         |   1 -
 src/core/tensor/math_kernel.cu     |   5 +
 src/core/tensor/math_kernel.h      |   2 +-
 src/core/tensor/tensor.cc          |  10 +-
 src/core/tensor/tensor_math_cuda.h |   5 +-
 src/python/singa/optimizer.py      |   1 +
 8 files changed, 169 insertions(+), 141 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/examples/mnist/README.md
----------------------------------------------------------------------
diff --git a/examples/mnist/README.md b/examples/mnist/README.md
index bfd480f..9f59e7e 100644
--- a/examples/mnist/README.md
+++ b/examples/mnist/README.md
@@ -1,3 +1,18 @@
-This example is to train an RBM model using mnist data set. This RBM follows paper http://www.cs.toronto.edu/~hinton/science.pdf and the source code for this paper can be found http://www.cs.toronto.edu/~hinton/MatlabForSciencePaper.html
-1. Download dataset mnist.pkl.gz from https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz
-2. $ python train.py
+# Train a RBM model against MNIST dataset
+
+This example is to train an RBM model using the
+MNIST dataset. The RBM model and its hyper-parameters are set following
+[Hinton's paper](http://www.cs.toronto.edu/~hinton/science.pdf)
+
+## Running instructions
+
+1. Download the pre-processed [MNIST dataset](https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz)
+
+2. Start the training
+
+        python train.py
+
+By default the training code would run on CPU. To run it on a GPU card, please start
+the program with an additional argument
+
+        python train.py --use_gpu

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
index 52b023a..43b8e26 100644
--- a/examples/mnist/train.py
+++ b/examples/mnist/train.py
@@ -1,131 +1,134 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import cPickle
-import numpy as np
-import numpy.matlib
-import os
-import sys
-import gzip, numpy
-
-
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '../../build/python'))
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '../../build/lib'))
-sys.path.append(os.path.join(os.path.dirname(__file__),'../../build/src'))
-from singa import initializer
-from singa import utils
-from singa import optimizer
-from singa import device
-from singa import tensor
-from singa.proto import core_pb2
-
-
-
-def load_train_data(dir_path):
-    f = gzip.open(dir_path, 'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    traindata = train_set[0].astype(np.float32)
-    validdata = valid_set[0].astype(np.float32)
-    return traindata, validdata
-
-
-
-def train(data_dir, num_epoch=10, batch_size=100):
-    print 'Start intialization............'
-    lr = 0.1   # Learning rate
-    weight_decay  = 0.0002
-    hdim = 1000
-    vdim = 784
-    opt = optimizer.SGD(momentum=0.8, weight_decay=weight_decay)
-    
-    shape = (vdim, hdim)
-    tweight = tensor.Tensor(shape)
-    initializer.gaussian(tweight, 0.0, 0.1)
-    tvbias = tensor.from_numpy(np.zeros(vdim, dtype = np.float32))
-    thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32))
-    opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)
-
-    print 'Loading data ..................'
-    train_x, valid_x = load_train_data(data_dir)
-
-    num_train_batch = train_x.shape[0]/batch_size
-    print "num_train_batch = \n", num_train_batch
-    for epoch in range(num_epoch):
-        trainerrorsum = 0.0
-        validerrorsum = 0.0
-        print 'Epoch %d' % epoch
-        for b in range(num_train_batch):
-            # positive phase
-            if b % 100 == 0:
-                print "batch: \n", b
-
-            tdata = tensor.from_numpy(train_x[ (b * batch_size): ((b + 1) * batch_size), : ])
-            tposhidprob = tensor.mult(tdata, tweight)
-            tposhidprob.add_row(thbias)
-            tposhidprob = tensor.sigmoid(tposhidprob)
-            tposhidrandom = tensor.Tensor(tposhidprob.shape)
-            initializer.uniform(tposhidrandom, 0.0, 1.0)
-            tposhidsample = tensor.gt(tposhidprob, tposhidrandom)
-            
-            # negative phase
-            tnegdata = tensor.mult(tposhidsample, tweight.transpose())
-            tnegdata.add_row(tvbias)
-            tnegdata = tensor.sigmoid(tnegdata)
-
-            tneghidprob = tensor.mult(tnegdata, tweight)
-            tneghidprob.add_row(thbias) 
-            tneghidprob = tensor.sigmoid(tneghidprob)
-            trainerror = tensor.sum(tensor.eltwise_mult((tdata - tnegdata),(tdata - tnegdata)))
-            trainerrorsum = trainerror + trainerrorsum
-           
-            tgweight = tensor.mult(tnegdata.transpose(), tneghidprob) - tensor.mult(tdata.transpose(), tposhidprob)
-            tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
-            tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)
-            
-            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, '')
-            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, '')
-            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, '')
-
-        info = 'train errorsum = %f' \
-            % (trainerrorsum)
-        print info
-
-        tvaliddata = tensor.from_numpy(valid_x[ :, : ])
-        tvalidposhidprob = tensor.mult(tvaliddata, tweight)
-        tvalidposhidprob.add_row(thbias)
-        tvalidposhidprob = tensor.sigmoid(tvalidposhidprob)
-        tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape)
-        initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
-        tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)
-
-        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.transpose())
-        tvalidnegdata.add_row(tvbias)
-        tvalidnegdata = tensor.sigmoid(tvalidnegdata)
-
-        validerrorsum = tensor.sum(tensor.eltwise_mult((tvaliddata - tvalidnegdata),(tvaliddata - tvalidnegdata)))
-        validinfo = 'valid errorsum = %f' \
-            % (validerrorsum)
-        print validinfo
-
-
-if __name__ == '__main__':
-    data_dir = 'mnist.pkl.gz'
-    assert os.path.exists(data_dir), \
-        'Pls download the mnist dataset'
-    train(data_dir)
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import numpy as np
+import os
+import gzip
+import argparse
+import cPickle
+from singa import initializer
+from singa import utils
+from singa import optimizer
+from singa import device
+from singa import tensor
+
+
+from singa.proto import core_pb2
+
+
+
+def load_train_data(file_path):
+    f = gzip.open(file_path, 'rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    traindata = train_set[0].astype(np.float32)
+    validdata = valid_set[0].astype(np.float32)
+    print traindata.shape, validdata.shape
+    return traindata, validdata
+
+
+
+def train(data_file, use_gpu, num_epoch=10, batch_size=100):
+    print 'Start intialization............'
+    lr = 0.1   # Learning rate
+    weight_decay  = 0.0002
+    hdim = 1000
+    vdim = 784
+    opt = optimizer.SGD(momentum=0.8, weight_decay=weight_decay)
+
+    tweight = tensor.Tensor((vdim, hdim))
+    tweight.gaussian(0.0, 0.1)
+    tvbias = tensor.from_numpy(np.zeros(vdim, dtype = np.float32))
+    thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32))
+    opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)
+
+    print 'Loading data ..................'
+    train_x, valid_x = load_train_data(data_file)
+
+    if use_gpu:
+        dev = device.create_cuda_gpu()
+    else:
+        dev = device.get_default_device()
+
+    for t in [tweight, tvbias, thbias]:
+        t.to_device(dev)
+
+    num_train_batch = train_x.shape[0] / batch_size
+    print "num_train_batch = %d " % (num_train_batch)
+    for epoch in range(num_epoch):
+        trainerrorsum = 0.0
+        validerrorsum = 0.0
+        print 'Epoch %d' % epoch
+        for b in range(num_train_batch):
+            # positive phase
+            tdata = tensor.from_numpy(
+                    train_x[(b * batch_size):((b + 1) * batch_size), : ])
+            tdata.to_device(dev)
+            tposhidprob = tensor.mult(tdata, tweight)
+            tposhidprob.add_row(thbias)
+            tposhidprob = tensor.sigmoid(tposhidprob)
+            tposhidrandom = tensor.Tensor(tposhidprob.shape, dev)
+            tposhidrandom.uniform(0.0, 1.0)
+            tposhidsample = tensor.gt(tposhidprob, tposhidrandom)
+
+            # negative phase
+            tnegdata = tensor.mult(tposhidsample, tweight.transpose())
+            tnegdata.add_row(tvbias)
+            tnegdata = tensor.sigmoid(tnegdata)
+
+            tneghidprob = tensor.mult(tnegdata, tweight)
+            tneghidprob.add_row(thbias)
+            tneghidprob = tensor.sigmoid(tneghidprob)
+            error = tensor.sum(tensor.square((tdata - tnegdata)))
+            trainerrorsum = error + trainerrorsum
+
+            tgweight = tensor.mult(tnegdata.transpose(), tneghidprob) -\
+                    tensor.mult(tdata.transpose(), tposhidprob)
+            tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
+            tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)
+
+            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w')
+            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb')
+            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb')
+
+        print 'training errorsum = %f' % (trainerrorsum)
+
+        tvaliddata = tensor.from_numpy(valid_x)
+        tvaliddata.to_device(dev)
+        tvalidposhidprob = tensor.mult(tvaliddata, tweight)
+        tvalidposhidprob.add_row(thbias)
+        tvalidposhidprob = tensor.sigmoid(tvalidposhidprob)
+        tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev)
+        initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
+        tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)
+
+        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.transpose())
+        tvalidnegdata.add_row(tvbias)
+        tvalidnegdata = tensor.sigmoid(tvalidnegdata)
+
+        validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata)))
+        print 'valid errorsum = %f' % (validerrorsum)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Train RBM over MNIST')
+    parser.add_argument('file', type=str, help='the dataset path')
+    parser.add_argument('--use_gpu', action='store_true')
+    args = parser.parse_args()
+
+    assert os.path.exists(args.file), 'Pls download the MNIST dataset from' \
+            'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
+    train(args.file, args.use_gpu)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/include/singa/model/loss.h
----------------------------------------------------------------------
diff --git a/include/singa/model/loss.h b/include/singa/model/loss.h
index 951c477..4ee41cb 100644
--- a/include/singa/model/loss.h
+++ b/include/singa/model/loss.h
@@ -51,7 +51,6 @@ public:
   /// [Evaluate|Forward] Backward.
   float Evaluate(int flag, const Tensor &prediction, const Tensor &target) {
     Tensor loss = Forward(flag, prediction, target);
-    loss.ToHost();
     return Sum<float>(loss) / (1.0f * loss.Size());
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/src/core/tensor/math_kernel.cu
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu
index e0112f3..d3f3335 100644
--- a/src/core/tensor/math_kernel.cu
+++ b/src/core/tensor/math_kernel.cu
@@ -35,6 +35,8 @@
 namespace singa {
 // Cuda Kernel Functions
 namespace cuda {
+/*
+wangwei: Not used due to error in the code.
 __global__ void KernelSum(const size_t n, const float *in, float *out) {
   int THREADS = blockDim.x;
 
@@ -65,6 +67,7 @@ __global__ void KernelSum(const size_t n, const float *in, float *out) {
   __syncthreads();
   *out = aux[0];
 }
+*/
 
 __global__ void KernelAdd(const size_t n, const float *in1, const float *in2,
                           float *out) {
@@ -461,12 +464,14 @@ void div(const size_t n, const float *in1, const float *in2, float *out,
   KernelDiv <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in1, in2, out);
 }
 
+/*
 void sum(const size_t n, const float *in, float *out, cudaStream_t s) {
   int threads_per_block = n > CU1DBLOCK ? CU1DBLOCK : n;
   //  here, we only need one block
   int num_blocks = 1;
   KernelSum <<<num_blocks, threads_per_block>>> (n, in, out);
 }
+*/
 
 void ComputeCrossEntropy(size_t batchsize, const size_t dim, const float *p,
                          const int *t, float *loss, cudaStream_t stream) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/src/core/tensor/math_kernel.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h
index 202777e..cb0cb6a 100644
--- a/src/core/tensor/math_kernel.h
+++ b/src/core/tensor/math_kernel.h
@@ -101,7 +101,7 @@ void mult(const size_t n, const float *in1, const float *in2, float *out,
 void div(const size_t n, const float *in1, const float *in2, float *out,
          cudaStream_t s);
 
-void sum(const size_t n, const float *in, float *out, cudaStream_t s);
+// void sum(const size_t n, const float *in, float *out, cudaStream_t s);
 
 void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
                          const float *p, const int *t, float *loss,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index b80e233..670b27e 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -626,10 +626,14 @@ Tensor Average(const Tensor &M, int axis) {
 template <>
 float Sum<float>(const Tensor &in) {
   float s = 0.0f;
+  Tensor one(in.shape(), in.device(), in.data_type());
+  one.SetValue(1.0f);
   TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
-    in.device()->Exec([in, &s](Context *ctx) {
-      Sum<DType, Lang>(in.Size(), in.block(), &s, ctx);
-    }, {in.block()}, {});
+    one.device()->Exec([in, one, &s](Context *ctx) {
+      DType ret = DType(0);
+      Dot<DType, Lang>(in.Size(), in.block(), one.block(), &ret, ctx);
+      s = ret;
+    }, {in.block(), one.block()}, {});
   });
   return s;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index 1cd61b3..4daa97a 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -263,8 +263,9 @@ void Sub<float, lang::Cuda>(const size_t num, const Block* in1,
 template <>
 void Sum<float, lang::Cuda>(const size_t num, const Block* in, float* out,
                             Context* ctx) {
-  const float* inPtr = static_cast<const float*>(in->data());
-  cuda::sum(num, inPtr, out, ctx->stream);
+  LOG(FATAL) << "Cuda Sum is not implemented!";
+  // const float* inPtr = static_cast<const float*>(in->data());
+  // cuda::sum(num, inPtr, out, ctx->stream);
 }
 
 /// Element-wise operation, out[i]=tanh([in[i])

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5b332a40/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 7cab746..aa6bdd1 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -187,6 +187,7 @@ class SGD(Optimizer):
         """
         super(SGD, self).__init__(lr, momentum, decay)
         conf = model_pb2.OptimizerConf()
+        conf.momentum = momentum
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())