You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by mo...@apache.org on 2018/05/18 04:52:07 UTC

[01/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Repository: incubator-singa
Updated Branches:
  refs/heads/master 6bcd5d0e9 -> 6d7d629bf


SINGA-349 Create layer operations for autograd

1. layer operations works well.
2. following steps :
  - to change the API to Pytorch style.
  - add flags in forward and backward function
  - realize changeable initialization methods.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5abcc6e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5abcc6e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5abcc6e0

Branch: refs/heads/master
Commit: 5abcc6e0dd803fa8a631d957765891cf5e836ec3
Parents: e4610e0
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Fri May 4 17:43:02 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:06 2018 +0800

----------------------------------------------------------------------
 python/singa/layer_ops.py | 158 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5abcc6e0/python/singa/layer_ops.py
----------------------------------------------------------------------
diff --git a/python/singa/layer_ops.py b/python/singa/layer_ops.py
new file mode 100644
index 0000000..1ca888f
--- /dev/null
+++ b/python/singa/layer_ops.py
@@ -0,0 +1,158 @@
+from singa import tensor
+from singa import layer
+from singa.proto import model_pb2
+from singa import autograd
+
+
+
+def ctensor2numpy(x):
+    '''
+    // For test use.
+
+
+    To be used in SoftMax Operation.
+    Convert a singa_tensor to numpy_tensor.
+    '''
+    np_array = x.GetFloatValue(int(x.Size()))
+    return np_array.reshape(x.shape())
+
+class Convolution2D(tensor.Operation):
+    def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
+                 cudnn_prefer='fastest', workspace_byte_limit=1024,
+                 data_format='NCHW', use_bias=True, W_specs=None, b_specs=None,
+                 pad=None,input_sample_shape=None):
+        '''
+        How to match Keras:
+
+        in Keras conv2d, self.kernel record how to generate kernel (shape,initializer,name,regularizer,constraint),
+        it can be interpret to
+        shape -> kernel+input_sample_shape[0](nb_channels)+nb_kernels,
+        initializer, name, regularizer, constraint -> W_specs.
+        '''
+        self.PyLayer = layer.Conv2D(name, nb_kernels, kernel=kernel, stride=stride, border_mode=border_mode,
+                 cudnn_prefer=cudnn_prefer, workspace_byte_limit=workspace_byte_limit,
+                 data_format=data_format, use_bias=use_bias, W_specs=W_specs, b_specs=b_specs,
+                 pad=pad, input_sample_shape=input_sample_shape)
+
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        param_data = self.PyLayer.layer.param_values()
+        if not hasattr(self, 'w'):
+            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            self.w.gaussian(0.0, 0.1)  # TODO realize other initialization method according to W_specs
+        
+        xs = [x, self.w]
+
+        if len(param_data) == 2:
+            self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+            self.b.set_value(0.0)  # TODO realize other initialization method according to b_specs
+            xs.append(self.b)
+
+        xs = tuple(xs)
+        return self._do_forward(*xs)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(4, xs[0])  #how ktrain works?  flag & ktrain.
+
+    def backward(self, dy):
+        ret = self.PyLayer.layer.Backward(0, dy)
+        return (ret[0],)+ret[1]
+
+
+class MaxPooling2D(tensor.Operation):
+    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
+                 data_format='NCHW', input_sample_shape=None):
+
+        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
+                                           kernel, stride, border_mode,
+                                           pad, data_format, input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, x):
+        return self.PyLayer.layer.Forward(4, x)
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]   # how backward() return?
+
+
+class Activation(tensor.Operation):
+    def __init__(self,name, mode='relu',input_sample_shape=None):
+        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, x):
+        return self.PyLayer.layer.Forward(4, x)
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class Flatten(tensor.Operation):
+    def __init__(self, name, axis=1, input_sample_shape=None):
+        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, x):
+        return self.PyLayer.layer.Forward(4, x)
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class Dense(tensor.Operation):
+    def __init__(self, name, num_output, use_bias=True,
+                     W_specs=None, b_specs=None,
+                     W_transpose=False, input_sample_shape=None):
+
+        self.PyLayer = layer.Dense(name, num_output=num_output, use_bias=use_bias,
+                     W_specs=W_specs, b_specs=b_specs,
+                     W_transpose=W_transpose, input_sample_shape=input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+
+        param_data = self.PyLayer.layer.param_values()
+
+        if not hasattr(self, 'w'):
+            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            self.w.gaussian(0.0, 0.1)  # TODO realize other initialization method according to W_specs
+
+        xs = [x, self.w]
+
+        if len(param_data) == 2:
+            self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+            self.b.set_value(0.0)  # TODO realize other initialization method according to b_specs
+            xs.append(self.b)
+
+        xs = tuple(xs)
+        return self._do_forward(*xs)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(4, xs[0])
+
+    def backward(self, dy):
+        ret = self.PyLayer.layer.Backward(0, dy)
+        return (ret[0],)+ret[1]
+
+
+
+
+
+
+
+


[05/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1.realize a simple convolution network based on autograd for test use.

2.the code is runnable on my computer, the training effect is obvious, the network parameters explainable.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e4610e03
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e4610e03
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e4610e03

Branch: refs/heads/master
Commit: e4610e0379d387169c0aaf4d81f050318933ff09
Parents: 51c242b
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Fri May 4 17:31:02 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:06 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist_cnn.py | 116 ++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e4610e03/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
new file mode 100644
index 0000000..e488bac
--- /dev/null
+++ b/examples/autograd/mnist_cnn.py
@@ -0,0 +1,116 @@
+import numpy as np
+from singa import convolution_operation as layer_ops
+from singa import tensor
+from singa import autograd
+from singa import optimizer
+
+
+def load_data(path):
+    f = np.load(path)
+    x_train, y_train = f['x_train'], f['y_train']
+    x_test, y_test = f['x_test'], f['y_test']
+    f.close()
+    return (x_train, y_train), (x_test, y_test)
+
+def to_categorical(y, num_classes):
+    '''
+    Converts a class vector (integers) to binary class matrix.
+
+    Args
+        y: class vector to be converted into a matrix
+            (integers from 0 to num_classes).
+        num_classes: total number of classes.
+
+    Return
+        A binary matrix representation of the input.
+    '''
+    y = np.array(y, dtype='int')
+    n = y.shape[0]
+    categorical = np.zeros((n, num_classes))
+    categorical[np.arange(n), y] = 1
+    categorical=categorical.astype(np.float32)
+    return categorical
+
+def preprocess(data):
+    data=data.astype(np.float32)
+    data /= 255
+    data=np.expand_dims(data, axis=1)
+    return data
+
+def accuracy(pred,target):
+    y = np.argmax(pred, axis=1)
+    t = np.argmax(target, axis=1)
+    a = y == t
+    return np.array(a, 'int').sum() / float(len(t))
+
+
+if __name__ == '__main__':
+
+    batch_number=600
+    num_classes = 10
+    epochs = 1
+
+    sgd = optimizer.SGD(0.05)
+    #opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
+
+    train,test=load_data('/Users/wanqixue/Downloads/mnist.npz')
+    x_train=preprocess(train[0])
+    y_train = to_categorical(train[1], num_classes)
+
+    x_test=preprocess(test[0])
+    y_test=to_categorical(test[1],num_classes)
+    print 'the shape of training data is',x_train.shape
+    print 'the shape of training label is',y_train.shape
+    print 'the shape of testing data is', x_test.shape
+    print 'the shape of testing label is', y_test.shape
+
+
+    conv1=layer_ops.Convolution2D('conv1',32,3,1,border_mode='same')
+    conv2=layer_ops.Convolution2D('conv2',32,3,1,border_mode='same')
+
+    #operations can create when call
+    relu1=layer_ops.Activation('relu1')
+    relu2 = layer_ops.Activation('relu2')
+    pooling= layer_ops.MaxPooling2D('pooling',3,1,border_mode='same')
+    flatten=layer_ops.Flatten('flatten')
+    matmul=tensor.Matmul()
+    add_bias=tensor.AddBias()
+    softmax=tensor.SoftMax()
+    cross_entropy=tensor.CrossEntropy()
+    #avoid repeat create operations
+
+    w = tensor.Tensor(shape=(25088, 10), requires_grad=True, stores_grad=True) #package a dense layer to calculate the shape automatically
+    w.gaussian(0.0, 0.1)
+
+    b = tensor.Tensor(shape=(1, 10), requires_grad=True, stores_grad=True)
+    b.set_value(0.0)
+
+    def forward(x,t):
+        y=conv1(x)[0]
+        y=relu1(y)[0]
+        y=conv2(y)[0]
+        y=relu2(y)[0]
+        y=pooling(y)[0]
+        y=flatten(y)[0]
+        y=matmul(y,w)[0]
+        y=add_bias(y,b)[0]
+        y=softmax(y)[0]
+        loss=cross_entropy(y,t)[0]
+        return loss, y
+
+    for epoch in range(epochs):
+        #for i in range(batch_number):
+        for i in range(50):
+            inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+            targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+            loss, y = forward(inputs,targets)
+
+            accuracy_rate = accuracy(tensor.ctensor2numpy(y.data),tensor.ctensor2numpy(targets.data))
+            if (i % 5 == 0):
+                print 'accuracy is:', accuracy_rate,'loss is:', tensor.ctensor2numpy(loss.data)[0]
+
+            in_grads = autograd.backward(loss)
+
+            for param in in_grads:
+                sgd.apply(0, in_grads[param], param, '')
+


[12/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

clean the code and add comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d7d629b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d7d629b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d7d629b

Branch: refs/heads/master
Commit: 6d7d629bf00437c23a67f1de3b1ed085764e9492
Parents: 403843d
Author: Wang Wei <dc...@nus.edu.sg>
Authored: Thu May 17 21:18:31 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist_cnn.py |  41 ++++----
 python/singa/autograd.py       | 200 +++++++++++++++++++-----------------
 2 files changed, 126 insertions(+), 115 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d7d629b/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 7afbb9e..3ab8b62 100644
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -31,18 +31,18 @@ def to_categorical(y, num_classes):
     n = y.shape[0]
     categorical = np.zeros((n, num_classes))
     categorical[np.arange(n), y] = 1
-    categorical=categorical.astype(np.float32)
+    categorical = categorical.astype(np.float32)
     return categorical
 
 
 def preprocess(data):
-    data=data.astype(np.float32)
+    data = data.astype(np.float32)
     data /= 255
-    data=np.expand_dims(data, axis=1)
+    data = np.expand_dims(data, axis=1)
     return data
 
 
-def accuracy(pred,target):
+def accuracy(pred, target):
     y = np.argmax(pred, axis=1)
     t = np.argmax(target, axis=1)
     a = y == t
@@ -55,8 +55,9 @@ if __name__ == '__main__':
     parser.add_argument('file_path', type=str, help='the dataset path')
     args = parser.parse_args()
 
-    assert os.path.exists(args.file_path), 'Pls download the MNIST dataset from' \
-     'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
+    assert os.path.exists(args.file_path), \
+        'Pls download the MNIST dataset from ' \
+        'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
 
     train, test = load_data(args.file_path)
 
@@ -69,18 +70,17 @@ if __name__ == '__main__':
     x_train = preprocess(train[0])
     y_train = to_categorical(train[1], num_classes)
 
-    x_test=preprocess(test[0])
-    y_test=to_categorical(test[1],num_classes)
-    print ('the shape of training data is', x_train.shape)
-    print ('the shape of training label is', y_train.shape)
-    print ('the shape of testing data is', x_test.shape)
-    print ('the shape of testing label is', y_test.shape)
+    x_test = preprocess(test[0])
+    y_test = to_categorical(test[1], num_classes)
+    print('the shape of training data is', x_train.shape)
+    print('the shape of training label is', y_train.shape)
+    print('the shape of testing data is', x_test.shape)
+    print('the shape of testing label is', y_test.shape)
 
     # operations initialization
     conv1 = autograd.Conv2d(3, 32)
     conv2 = autograd.Conv2d(32, 32)
-    linear = autograd.Linear(32*28*28, 10)
-
+    linear = autograd.Linear(32 * 28 * 28, 10)
 
     def forward(x, t):
         y = conv1(x)
@@ -94,18 +94,21 @@ if __name__ == '__main__':
         loss = autograd.cross_entropy(y, t)
         return loss, y
 
+    autograd.training = True
     for epoch in range(epochs):
         for i in range(batch_number):
-            inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
-            targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+            inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :])
+            targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :])
 
             loss, y = forward(inputs, targets)
 
-            accuracy_rate = accuracy(autograd.ctensor2numpy(y.data),autograd.ctensor2numpy(targets.data))
+            accuracy_rate = accuracy(autograd.ctensor2numpy(
+                y.data), autograd.ctensor2numpy(targets.data))
             if (i % 5 == 0):
-                print('accuracy is:', accuracy_rate,'loss is:', autograd.ctensor2numpy(loss.data)[0])
+                print('accuracy is:', accuracy_rate, 'loss is:',
+                      autograd.ctensor2numpy(loss.data)[0])
 
             in_grads = autograd.backward(loss)
 
             for param in in_grads:
-                sgd.apply(0, in_grads[param], param, '')
\ No newline at end of file
+                sgd.apply(0, in_grads[param], param, '')

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d7d629b/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index de3dc92..fc438ab 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -1,18 +1,18 @@
 
 from __future__ import division
 
-from functools import reduce
 from collections import Counter, deque
-from .tensor import Tensor
+import numpy as np
+import math
 
-from singa import layer
+from .tensor import Tensor
+from . import layer
 from singa.proto import model_pb2
 from . import singa_wrap as singa
 
-import numpy as np
-import math
 
 CTensor = singa.Tensor
+training = False
 
 
 class Operation(object):
@@ -20,12 +20,16 @@ class Operation(object):
     An operation includes the forward and backward function of
     tensor calculation.
 
-    To add a specific operation Xxxx, subclass Operation and implement
-    forward() and backward(). Then implement a function xxxx which creates
-    a Xxxx instance and calls __call__ to do forward. The autograd engine
-    is able to do backward propagation by calling the backward() of Xxxx
-    automatically. Notice that the tensors are CTensor. NOT Python Tensor.
-    The arguments of forward() and backward() should only include CTensor args;
+    Steps to add a specific operation Xxxx:
+    1. create a subclass of Operation, name it as Xxxx
+    2. if Xxxx is implemented using other Operations, then override
+       _do_forward() function;
+       if Xxxx is implemented using CTensor operations,
+       then override the forward() and backward(); The arguments of forward()
+       and backward() should only include CTensor;
+       if Xxxx is implemented by calling functions in layer.py, then override
+       __call__(), forward() and backward(). TODO(wangwei) avoid this complex
+       case.
     '''
 
     def __call__(self, *xs):
@@ -103,6 +107,9 @@ class Operation(object):
         '''
         raise NotImplementedError
 
+    def get_params(self):
+        return []
+
 
 class Dummy(Operation):
     '''Dummy operation whice serves as a placehoder for autograd
@@ -119,10 +126,6 @@ class Dummy(Operation):
 
 
 class ReLU(Operation):
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        self.flag=flag
-        return self._do_forward(x)
 
     def forward(self, x):
         '''
@@ -132,7 +135,7 @@ class ReLU(Operation):
         Returns:
             a new CTensor whose element y = x if x >= 0; otherwise 0;
         '''
-        if self.flag:
+        if training:
             self.input = x
         return singa.ReLU(x)
 
@@ -154,10 +157,6 @@ def relu(x):
 
 class Matmul(Operation):
     '''For matrix multiplication'''
-    def __call__(self, x, w, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        self.flag=flag
-        return self._do_forward(x, w)
 
     def forward(self, x, w):
         '''Do forward propgation.
@@ -171,7 +170,7 @@ class Matmul(Operation):
         Returns:
             a CTensor for the result
         '''
-        if self.flag:
+        if training:
             self.input = (x, w)
         return singa.Mult(x, w)
 
@@ -187,13 +186,13 @@ class Matmul(Operation):
             singa.Mult(self.input[0].T(), dy)
 
 
-def matmul(x, w, flag=True):
-    return Matmul()(x, w, flag)[0]
+def matmul(x, w):
+    return Matmul()(x, w)[0]
 
 
 class AddBias(Operation):
     '''
-    Add Bias to each row / column of the Tensor, depending on the parameter axis.
+    Add Bias to each row / column of the Tensor, depending on the axis arg.
     '''
 
     def __init__(self, axis=0):
@@ -303,7 +302,8 @@ class CrossEntropy(Operation):
     def forward(self, x, t):
         '''
         Args:
-            x (CTensor): 1d or 2d tensor, the prediction data(output) of current network.
+            x (CTensor): 1d or 2d tensor, the prediction data(output)
+                         of current network.
             t (CTensor): 1d or 2d tensor, the target data for training.
 
         Returns:
@@ -320,12 +320,13 @@ class CrossEntropy(Operation):
     def backward(self, dy=1.0):
         '''
         Args:
-            dy (float or CTensor): scalar, accumulate gradient from outside of current network, usually
-            equal to 1.0
+            dy (float or CTensor): scalar, accumulate gradient from outside
+                                of current network, usually equal to 1.0
 
         Returns:
-            dx (CTensor): data for the dL /dx, L is the loss, x is the output of current network.
-            note that this is true for dy = 1.0
+            dx (CTensor): data for the dL /dx, L is the loss, x is the output
+                          of current network. note that this is true for
+                          dy = 1.0
         '''
         dx = singa.__div__(self.t, self.x)
         dx *= float(-1 / self.x.shape()[0])
@@ -351,17 +352,18 @@ def ctensor2numpy(x):
 
 
 class Conv2d(Operation):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True,
-                 **kwargs):
+
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True, **kwargs):
 
         inner_params = {'name': 'Conv2d',
-                          'border_mode': 'same',
-                          'cudnn_prefer': 'fastest',
-                          'workspace_byte_limit': 1024,
-                          'data_format': 'NCHW',
-                          'W_specs': {'init': 'xavier'},
-                          'b_specs': {'init': 'constant'},
-                          'input_sample_shape': None}
+                        'border_mode': 'same',
+                        'cudnn_prefer': 'fastest',
+                        'workspace_byte_limit': 1024,
+                        'data_format': 'NCHW',
+                        'W_specs': {'init': 'xavier'},
+                        'b_specs': {'init': 'constant'},
+                        'input_sample_shape': None}
         # TODO valid value of inner_params check
 
         for kwarg in kwargs:
@@ -369,7 +371,7 @@ class Conv2d(Operation):
                 raise TypeError('Keyword argument not understood:', kwarg)
             else:
                 inner_params[kwarg] = kwargs[kwarg]
-                
+
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.W_specs = inner_params['W_specs']
@@ -388,21 +390,30 @@ class Conv2d(Operation):
         if dilation != 1 or groups != 1:
             raise ValueError('Not implemented yet')
 
-        self.PyLayer = layer.Conv2D(inner_params['name'], nb_kernels=out_channels, kernel=kernel_size, stride=stride,
+        self.PyLayer = layer.Conv2D(inner_params['name'],
+                                    nb_kernels=out_channels,
+                                    kernel=kernel_size,
+                                    stride=stride,
                                     border_mode=inner_params['border_mode'],
-                 cudnn_prefer=inner_params['cudnn_prefer'], workspace_byte_limit=inner_params['workspace_byte_limit'],
-                 data_format=inner_params['data_format'], use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
-                 pad=pad, input_sample_shape=inner_params['input_sample_shape'])
+                                    cudnn_prefer=inner_params['cudnn_prefer'],
+                                    workspace_byte_limit=inner_params[
+                                        'workspace_byte_limit'],
+                                    data_format=inner_params['data_format'],
+                                    use_bias=bias,
+                                    W_specs=self.W_specs,
+                                    b_specs=self.b_specs,
+                                    pad=pad,
+                                    input_sample_shape=inner_params['input_sample_shape'])
 
     def get_params(self):
-        assert self.has_setup, \
-            'Must call setup() before get_params()'
-        params = self.PyLayer.layer.param_values()
-        return params
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
+        assert self.init_value is True, 'must initialize before get_params()'
+        if self.bias:
+            return (self.w, self.b)
+        else:
+            return self.w
+
+    def __call__(self, x):
+        if training:
             self.flag = model_pb2.kTrain
         else:
             self.flag = model_pb2.kEval
@@ -413,15 +424,18 @@ class Conv2d(Operation):
         param_data = self.PyLayer.layer.param_values()
 
         if not hasattr(self, 'w'):
-            self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True)
-            std = math.sqrt(2.0/(self.in_channels*self.kernel_size[0]*self.kernel_size[1]+self.out_channels))
+            self.w = Tensor(device=param_data[0].device, data=param_data[
+                            0], requires_grad=True, stores_grad=True)
+            std = math.sqrt(
+                2.0 / (self.in_channels * self.kernel_size[0] * self.kernel_size[1] + self.out_channels))
             self.w.gaussian(0.0, std)
 
         xs = [x, self.w]
 
         if len(param_data) == 2:
             if not hasattr(self, 'b'):
-                self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True)
+                self.b = Tensor(device=param_data[1].device, data=param_data[
+                                1], requires_grad=True, stores_grad=True)
                 self.b.set_value(0.0)
 
             xs.append(self.b)
@@ -434,10 +448,11 @@ class Conv2d(Operation):
 
     def backward(self, dy):
         ret = self.PyLayer.layer.Backward(self.flag, dy)
-        return (ret[0],)+ret[1]
+        return (ret[0],) + ret[1]
 
 
 class Linear(Operation):
+
     def __init__(self, in_features, out_features, bias=True):
         self.in_features = in_features
         self.out_features = out_features
@@ -453,47 +468,36 @@ class Linear(Operation):
         else:
             return self.w
 
-    def init_params(self, w, b=None):
-        if self.bias:
-            assert b is not None, 'must initialize bias.'
-            assert w.shape == self.w_shape, 'shape of parameters must match.'
-            assert b.shape == self.b_shape, 'shape of parameters must match.'
-            self.w = w
-            self.b = b
-        else:
-            assert b is None, 'cannot initialize bias.'
-            assert w.shape == self.w_shape, 'shape of parameters must match.'
-            self.w = w
-        self.init_value = True
-        return
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
+    def __call__(self, x):
         if self.init_value is False:
-            self.w = Tensor(shape=self.w_shape, requires_grad=True, stores_grad=True)
+            self.w = Tensor(shape=self.w_shape,
+                            requires_grad=True, stores_grad=True)
             std = math.sqrt(2.0 / (self.in_features + self.out_features))
             self.w.gaussian(0.0, std)
             if self.bias:
-                self.b = Tensor(shape=self.b_shape, requires_grad=True, stores_grad=True)
+                self.b = Tensor(shape=self.b_shape,
+                                requires_grad=True, stores_grad=True)
                 self.b.set_value(0.0)
             self.init_value = True
-        y = matmul(x, self.w, flag)
+        y = matmul(x, self.w)
         if self.bias:
             y = add_bias(y, self.b, axis=0)
         return y
 
 
 class MaxPool2d(Operation):
-    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+
+    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1,
+                 return_indices=False, ceil_mode=False, **kwargs):
 
         inner_params = {'name': 'MaxPool2d',
-                          'border_mode': 'same',
-                          'data_format': 'NCHW',
-                          'input_sample_shape': None
-                          }
+                        'border_mode': 'same',
+                        'data_format': 'NCHW',
+                        'input_sample_shape': None
+                        }
 
         for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
+            if kwarg not in inner_params:
                 raise TypeError('Keyword argument not understood:', kwarg)
             else:
                 inner_params[kwarg] = kwargs[kwarg]
@@ -503,16 +507,18 @@ class MaxPool2d(Operation):
         else:
             pad = padding
 
-        if dilation != 1 or return_indices is not False or ceil_mode is not False:
+        if dilation != 1 or return_indices or ceil_mode:
             raise ValueError('Not implemented yet')
 
-        self.PyLayer = layer.Pooling2D(inner_params['name'], model_pb2.PoolingConf.MAX,
-                                           kernel_size, stride, inner_params['border_mode'],
-                                           pad, inner_params['data_format'], inner_params['input_sample_shape'])
+        self.PyLayer = layer.Pooling2D(inner_params['name'],
+                                       model_pb2.PoolingConf.MAX,
+                                       kernel_size, stride, inner_params[
+                                           'border_mode'],
+                                       pad, inner_params['data_format'],
+                                       inner_params['input_sample_shape'])
 
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
+    def __call__(self, x):
+        if training:
             self.flag = model_pb2.kTrain
         else:
             self.flag = model_pb2.kEval
@@ -529,17 +535,19 @@ class MaxPool2d(Operation):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
 
-def max_pool_2d(x, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
-    return MaxPool2d(kernel_size, stride, padding, dilation, return_indices, ceil_mode, **kwargs)(x)[0]
+def max_pool_2d(x, kernel_size=3, stride=1, padding=0, dilation=1,
+                return_indices=False, ceil_mode=False, **kwargs):
+    return MaxPool2d(kernel_size, stride, padding, dilation, return_indices,
+                     ceil_mode, **kwargs)(x)[0]
 
 
 class Flatten(Operation):
-    def __init__(self, name='Flatten', axis=1, input_sample_shape=None):
-        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
 
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
+    def __init__(self):
+        self.PyLayer = layer.Flatten('flatten', 1)
+
+    def __call__(self, x):
+        if training:
             self.flag = model_pb2.kTrain
         else:
             self.flag = model_pb2.kEval
@@ -554,8 +562,8 @@ class Flatten(Operation):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
 
-def flatten(x, name='Flatten', axis=1, input_sample_shape=None):
-    return Flatten(name, axis, input_sample_shape)(x)[0]
+def flatten(x):
+    return Flatten()(x)[0]
 
 
 def infer_dependency(op):


[03/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd 

1.realize a simple convolution network based on autograd for test use.
2.the code is runnable on my computer, the training effect is obvious, the network parameters explainable.

Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d619e44e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d619e44e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d619e44e

Branch: refs/heads/master
Commit: d619e44e0700abc163d71b42efd6c1b35b58d7bb
Parents: 6bcd5d0
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Fri May 4 17:28:42 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:06 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist.py | 116 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d619e44e/examples/autograd/mnist.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist.py b/examples/autograd/mnist.py
new file mode 100644
index 0000000..e488bac
--- /dev/null
+++ b/examples/autograd/mnist.py
@@ -0,0 +1,116 @@
+import numpy as np
+from singa import convolution_operation as layer_ops
+from singa import tensor
+from singa import autograd
+from singa import optimizer
+
+
+def load_data(path):
+    f = np.load(path)
+    x_train, y_train = f['x_train'], f['y_train']
+    x_test, y_test = f['x_test'], f['y_test']
+    f.close()
+    return (x_train, y_train), (x_test, y_test)
+
+def to_categorical(y, num_classes):
+    '''
+    Converts a class vector (integers) to binary class matrix.
+
+    Args
+        y: class vector to be converted into a matrix
+            (integers from 0 to num_classes).
+        num_classes: total number of classes.
+
+    Return
+        A binary matrix representation of the input.
+    '''
+    y = np.array(y, dtype='int')
+    n = y.shape[0]
+    categorical = np.zeros((n, num_classes))
+    categorical[np.arange(n), y] = 1
+    categorical=categorical.astype(np.float32)
+    return categorical
+
+def preprocess(data):
+    data=data.astype(np.float32)
+    data /= 255
+    data=np.expand_dims(data, axis=1)
+    return data
+
+def accuracy(pred,target):
+    y = np.argmax(pred, axis=1)
+    t = np.argmax(target, axis=1)
+    a = y == t
+    return np.array(a, 'int').sum() / float(len(t))
+
+
+if __name__ == '__main__':
+
+    batch_number=600
+    num_classes = 10
+    epochs = 1
+
+    sgd = optimizer.SGD(0.05)
+    #opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
+
+    train,test=load_data('/Users/wanqixue/Downloads/mnist.npz')
+    x_train=preprocess(train[0])
+    y_train = to_categorical(train[1], num_classes)
+
+    x_test=preprocess(test[0])
+    y_test=to_categorical(test[1],num_classes)
+    print 'the shape of training data is',x_train.shape
+    print 'the shape of training label is',y_train.shape
+    print 'the shape of testing data is', x_test.shape
+    print 'the shape of testing label is', y_test.shape
+
+
+    conv1=layer_ops.Convolution2D('conv1',32,3,1,border_mode='same')
+    conv2=layer_ops.Convolution2D('conv2',32,3,1,border_mode='same')
+
+    #operations can create when call
+    relu1=layer_ops.Activation('relu1')
+    relu2 = layer_ops.Activation('relu2')
+    pooling= layer_ops.MaxPooling2D('pooling',3,1,border_mode='same')
+    flatten=layer_ops.Flatten('flatten')
+    matmul=tensor.Matmul()
+    add_bias=tensor.AddBias()
+    softmax=tensor.SoftMax()
+    cross_entropy=tensor.CrossEntropy()
+    #avoid repeat create operations
+
+    w = tensor.Tensor(shape=(25088, 10), requires_grad=True, stores_grad=True) #package a dense layer to calculate the shape automatically
+    w.gaussian(0.0, 0.1)
+
+    b = tensor.Tensor(shape=(1, 10), requires_grad=True, stores_grad=True)
+    b.set_value(0.0)
+
+    def forward(x,t):
+        y=conv1(x)[0]
+        y=relu1(y)[0]
+        y=conv2(y)[0]
+        y=relu2(y)[0]
+        y=pooling(y)[0]
+        y=flatten(y)[0]
+        y=matmul(y,w)[0]
+        y=add_bias(y,b)[0]
+        y=softmax(y)[0]
+        loss=cross_entropy(y,t)[0]
+        return loss, y
+
+    for epoch in range(epochs):
+        #for i in range(batch_number):
+        for i in range(50):
+            inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+            targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+            loss, y = forward(inputs,targets)
+
+            accuracy_rate = accuracy(tensor.ctensor2numpy(y.data),tensor.ctensor2numpy(targets.data))
+            if (i % 5 == 0):
+                print 'accuracy is:', accuracy_rate,'loss is:', tensor.ctensor2numpy(loss.data)[0]
+
+            in_grads = autograd.backward(loss)
+
+            for param in in_grads:
+                sgd.apply(0, in_grads[param], param, '')
+


[06/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

Modified the design of convolution operation to let it trainable.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/51c242b5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/51c242b5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/51c242b5

Branch: refs/heads/master
Commit: 51c242b5fbaeddb48b49a325e814608ff9c1a10d
Parents: 195b4d4
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Tue May 1 21:16:06 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:06 2018 +0800

----------------------------------------------------------------------
 python/singa/convolution_operation.py | 57 ++++++++++--------------------
 1 file changed, 19 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/51c242b5/python/singa/convolution_operation.py
----------------------------------------------------------------------
diff --git a/python/singa/convolution_operation.py b/python/singa/convolution_operation.py
index dcab2d4..8475c21 100644
--- a/python/singa/convolution_operation.py
+++ b/python/singa/convolution_operation.py
@@ -34,20 +34,24 @@ class Convolution2D(tensor.Operation):
                  data_format=data_format, use_bias=use_bias, W_specs=W_specs, b_specs=b_specs,
                  pad=pad, input_sample_shape=input_sample_shape)
 
+
     def __call__(self, x):
         if not self.PyLayer.has_setup:
             self.PyLayer.setup(x.shape[1:])
         param_data = self.PyLayer.layer.param_values()
+
+        if not hasattr(self, 'w'):
+            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            self.w.gaussian(0.0, 0.1)  # TODO realize other initialization method according to W_specs
+
+        xs = [x, self.w]
+
         if len(param_data) == 2:
-            w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-            w.gaussian(0.0, 0.1)
-            b.set_value(0.0)
-            xs = tuple([x, w, b])
-        else:
-            w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            w.gaussian(0.0, 0.1)
-            xs = tuple([x, w])
+            self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+            self.b.set_value(0.0)  # TODO realize other initialization method according to b_specs
+            xs.append(self.b)
+
+        xs = tuple(xs)
         return self._do_forward(*xs)
 
     def forward(self, *xs):
@@ -58,14 +62,6 @@ class Convolution2D(tensor.Operation):
         return (ret[0],)+ret[1]
 
 
-x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
-x.gaussian(1,0.0)
-layer_1= Convolution2D('conv1',4)
-y= layer_1(x)[0]
-z= layer_1._do_backward(y.data)
-a=ctensor2numpy(y.data)
-
-
 class MaxPooling2D(tensor.Operation):
     def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
                  data_format='NCHW', input_sample_shape=None):
@@ -85,13 +81,6 @@ class MaxPooling2D(tensor.Operation):
     def backward(self, dy):
         return self.PyLayer.layer.Backward(True, dy)[0]   # how backward() return?
 
-x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
-x.gaussian(1,0.0)
-layer_1= MaxPooling2D('pooling1')
-y= layer_1(x)[0]
-z= layer_1._do_backward(y.data)
-a=ctensor2numpy(y.data)
-
 
 class Activation(tensor.Operation):
     def __init__(self,name, mode='relu',input_sample_shape=None):
@@ -109,14 +98,6 @@ class Activation(tensor.Operation):
         return self.PyLayer.layer.Backward(True, dy)[0]
 
 
-x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
-x.gaussian(-1,0.0)
-layer_1= Activation('relu1')
-y= layer_1(x)[0]
-z= layer_1._do_backward(y.data)
-a=ctensor2numpy(y.data)
-
-
 class Flatten(tensor.Operation):
     def __init__(self, name, axis=1, input_sample_shape=None):
         self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
@@ -132,12 +113,12 @@ class Flatten(tensor.Operation):
     def backward(self, dy):
         return self.PyLayer.layer.Backward(True, dy)[0]
 
-x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
-x.gaussian(-1,0.0)
-layer_1= Flatten('flatten')
-y= layer_1(x)[0]
-z= layer_1._do_backward(y.data)
-a=ctensor2numpy(y.data)
+
+class Dense(tensor.Operation):
+    '''
+    Need to implemented?
+    '''
+    pass
 
 
 inputs=tensor.Tensor(shape=(10, 2, 3, 3), requires_grad=False, stores_grad=False)


[02/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1. cascade the new generated layer operations as well as some existing operations like malmul, softmax to test the compatibility.

2. test the autograd engine on these new developed operations to confirm these operations workable.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/195b4d42
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/195b4d42
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/195b4d42

Branch: refs/heads/master
Commit: 195b4d42151b94d163d5c377c41246012a229bae
Parents: a9d495a
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Mon Apr 30 11:55:07 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:06 2018 +0800

----------------------------------------------------------------------
 python/singa/convolution_operation.py | 38 +++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/195b4d42/python/singa/convolution_operation.py
----------------------------------------------------------------------
diff --git a/python/singa/convolution_operation.py b/python/singa/convolution_operation.py
index 7c42dce..dcab2d4 100644
--- a/python/singa/convolution_operation.py
+++ b/python/singa/convolution_operation.py
@@ -1,6 +1,8 @@
 from singa import tensor
 from singa import layer
 from singa.proto import model_pb2
+from singa import autograd
+
 
 
 def ctensor2numpy(x):
@@ -64,8 +66,6 @@ z= layer_1._do_backward(y.data)
 a=ctensor2numpy(y.data)
 
 
-
-
 class MaxPooling2D(tensor.Operation):
     def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
                  data_format='NCHW', input_sample_shape=None):
@@ -123,7 +123,7 @@ class Flatten(tensor.Operation):
 
     def __call__(self, x):
         if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape)
+            self.PyLayer.setup(x.shape[1:])
         return self._do_forward(x)
 
     def forward(self, x):
@@ -140,6 +140,38 @@ z= layer_1._do_backward(y.data)
 a=ctensor2numpy(y.data)
 
 
+inputs=tensor.Tensor(shape=(10, 2, 3, 3), requires_grad=False, stores_grad=False)
+inputs.gaussian(1,0)
+
+x = Convolution2D('conv',4)(inputs)[0]
+print(x.shape)
+
+x = MaxPooling2D('pooling')(x)[0]
+print(x.shape)
+
+x = Activation('relu')(x)[0]
+print(x.shape)
+
+x = Flatten('flatten')(x)[0]
+print(x.shape)
+
+w0 = tensor.Tensor(shape=(4, 10), requires_grad=True, stores_grad=True)
+w0.gaussian(0.0, 0.1)
+x = tensor.matmul(x, w0)
+print(x.shape)
+
+x = tensor.softmax(x)
+
+target=tensor.Tensor(shape=(10, 10), requires_grad=False, stores_grad=False)
+target.gaussian(0.0 ,0.1)
+loss = tensor.cross_entropy(x, target)
+
+grad=autograd.backward(loss)
+print(grad)
+
+
+
+
 
 
 


[04/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1.Implemente some layer operations by packaging Pylayer.
These layers is runnable.

2.Change the location of layer_ops to python/singa


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a9d495a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a9d495a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a9d495a6

Branch: refs/heads/master
Commit: a9d495a6b8998b4453772b3789b4e615f8365f7e
Parents: d619e44
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Fri Apr 27 11:18:00 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:06 2018 +0800

----------------------------------------------------------------------
 python/singa/convolution_operation.py | 145 +++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a9d495a6/python/singa/convolution_operation.py
----------------------------------------------------------------------
diff --git a/python/singa/convolution_operation.py b/python/singa/convolution_operation.py
new file mode 100644
index 0000000..7c42dce
--- /dev/null
+++ b/python/singa/convolution_operation.py
@@ -0,0 +1,145 @@
+from singa import tensor
+from singa import layer
+from singa.proto import model_pb2
+
+
+def ctensor2numpy(x):
+    '''
+    // For test use.
+
+
+    To be used in SoftMax Operation.
+    Convert a singa_tensor to numpy_tensor.
+    '''
+    np_array = x.GetFloatValue(int(x.Size()))
+    return np_array.reshape(x.shape())
+
+class Convolution2D(tensor.Operation):
+    def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
+                 cudnn_prefer='fastest', workspace_byte_limit=1024,
+                 data_format='NCHW', use_bias=True, W_specs=None, b_specs=None,
+                 pad=None,input_sample_shape=None):
+        '''
+        How to match Keras:
+
+        in Keras conv2d, self.kernel record how to generate kernel (shape,initializer,name,regularizer,constraint),
+        it can be interpret to
+        shape -> kernel+input_sample_shape[0](nb_channels)+nb_kernels,
+        initializer, name, regularizer, constraint -> W_specs.
+        '''
+        self.PyLayer = layer.Conv2D(name, nb_kernels, kernel=kernel, stride=stride, border_mode=border_mode,
+                 cudnn_prefer=cudnn_prefer, workspace_byte_limit=workspace_byte_limit,
+                 data_format=data_format, use_bias=use_bias, W_specs=W_specs, b_specs=b_specs,
+                 pad=pad, input_sample_shape=input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        param_data = self.PyLayer.layer.param_values()
+        if len(param_data) == 2:
+            w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+            w.gaussian(0.0, 0.1)
+            b.set_value(0.0)
+            xs = tuple([x, w, b])
+        else:
+            w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            w.gaussian(0.0, 0.1)
+            xs = tuple([x, w])
+        return self._do_forward(*xs)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(4, xs[0])  #how ktrain works?  flag & ktrain.
+
+    def backward(self, dy):
+        ret = self.PyLayer.layer.Backward(True, dy)
+        return (ret[0],)+ret[1]
+
+
+x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
+x.gaussian(1,0.0)
+layer_1= Convolution2D('conv1',4)
+y= layer_1(x)[0]
+z= layer_1._do_backward(y.data)
+a=ctensor2numpy(y.data)
+
+
+
+
+class MaxPooling2D(tensor.Operation):
+    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
+                 data_format='NCHW', input_sample_shape=None):
+
+        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
+                                           kernel, stride, border_mode,
+                                           pad, data_format, input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, x):
+        return self.PyLayer.layer.Forward(4, x)
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(True, dy)[0]   # how backward() return?
+
+x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
+x.gaussian(1,0.0)
+layer_1= MaxPooling2D('pooling1')
+y= layer_1(x)[0]
+z= layer_1._do_backward(y.data)
+a=ctensor2numpy(y.data)
+
+
+class Activation(tensor.Operation):
+    def __init__(self,name, mode='relu',input_sample_shape=None):
+        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, x):
+        return self.PyLayer.layer.Forward(4, x)
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(True, dy)[0]
+
+
+x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
+x.gaussian(-1,0.0)
+layer_1= Activation('relu1')
+y= layer_1(x)[0]
+z= layer_1._do_backward(y.data)
+a=ctensor2numpy(y.data)
+
+
+class Flatten(tensor.Operation):
+    def __init__(self, name, axis=1, input_sample_shape=None):
+        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
+
+    def __call__(self, x):
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape)
+        return self._do_forward(x)
+
+    def forward(self, x):
+        return self.PyLayer.layer.Forward(4, x)
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(True, dy)[0]
+
+x = tensor.Tensor(shape=(1, 1, 3, 3), requires_grad=True, stores_grad=True)
+x.gaussian(-1,0.0)
+layer_1= Flatten('flatten')
+y= layer_1(x)[0]
+z= layer_1._do_backward(y.data)
+a=ctensor2numpy(y.data)
+
+
+
+
+


[14/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1. modified the operation: Linear

2. reassemble codes: move Operation, Dummy from tensor.py to autograd.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/403843d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/403843d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/403843d8

Branch: refs/heads/master
Commit: 403843d820cc7ae31513436d7f314e99d98cf244
Parents: 40e609a
Author: xuewanqi <xu...@u.nus.edu>
Authored: Wed May 16 13:09:23 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 python/singa/autograd.py | 142 +++++++++++++++++++++++++++++++++++++-----
 python/singa/tensor.py   | 106 +------------------------------
 2 files changed, 131 insertions(+), 117 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/403843d8/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index daae43c..de3dc92 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -3,7 +3,7 @@ from __future__ import division
 
 from functools import reduce
 from collections import Counter, deque
-from .tensor import Tensor, Dummy, Operation
+from .tensor import Tensor
 
 from singa import layer
 from singa.proto import model_pb2
@@ -15,6 +15,109 @@ import math
 CTensor = singa.Tensor
 
 
+class Operation(object):
+    '''
+    An operation includes the forward and backward function of
+    tensor calculation.
+
+    To add a specific operation Xxxx, subclass Operation and implement
+    forward() and backward(). Then implement a function xxxx which creates
+    a Xxxx instance and calls __call__ to do forward. The autograd engine
+    is able to do backward propagation by calling the backward() of Xxxx
+    automatically. Notice that the tensors are CTensor. NOT Python Tensor.
+    The arguments of forward() and backward() should only include CTensor args;
+    '''
+
+    def __call__(self, *xs):
+        return self._do_forward(*xs)
+
+    def _do_forward(self, *xs):
+        '''
+        Do not call this function from user code. It is called by __call__().
+
+        Args:
+            xs, Tensor instance(s)
+
+        Returns:
+            Tensor instance(s)
+        '''
+        # TODO add the pre hook
+        assert all([isinstance(x, Tensor) for x in xs]), \
+            'xs should include only Tensor instances'
+
+        # need to do backward if any of its input arg needs gradient
+        self.requires_grad = any([x.requires_grad for x in xs])
+
+        self.src = []
+        for x in xs:
+            if x.stores_grad:
+                # store the tensor whose gradient needs be returned in
+                # backward(), e.g. if x is parameter
+                self.src.append((x.creator, id(x), x, x.stores_grad))
+            else:
+                # for intermediate tensors, they will be released soon;
+                # no need to store them --> use None
+                self.src.append((x.creator, id(x), None, x.stores_grad))
+
+        # get the CTensor (data) if the input arg is Tensor
+        xs = tuple(x.data for x in xs)
+        ys = self.forward(*xs)
+        if not isinstance(ys, tuple):
+            ys = (ys,)
+        # create Tensor based on CTensor(data);
+        # assume outputs are all Tensor instances
+        ys = tuple(Tensor(device=y.device,
+                          data=y,
+                          requires_grad=self.requires_grad,
+                          creator=self) for y in ys)
+        # map from python id to output index
+        self.y_id2idx = {id(y): i for i, y in enumerate(ys)}
+        # TODO add the post hook
+        return ys
+
+    def _do_backward(self, *dys):
+        dxs = self.backward(*dys)
+        if not isinstance(dxs, tuple):
+            dxs = (dxs,)
+        return dxs
+
+    def forward(self, *xs):
+        '''Forward propagation.
+
+        Args:
+            xs: input args consisting of only CTensors.
+
+        Returns:
+            CTensor instance(s)
+        '''
+        raise NotImplementedError
+
+    def backward(self, *dys):
+        ''' Backward propagation.
+
+        Args:
+            dys: input args consisting of only CTensors.
+
+        Returns:
+            CTensor instance(s)
+        '''
+        raise NotImplementedError
+
+
+class Dummy(Operation):
+    '''Dummy operation whice serves as a placehoder for autograd
+
+    Args:
+        name(string): set it for debug
+    '''
+
+    def __init__(self, tensor, name=None):
+        self.name = name
+        self.src = []
+        self.y_id2idx = {id(tensor): 0}
+        self.requires_grad = False
+
+
 class ReLU(Operation):
     def __call__(self, x, flag=True):
         assert type(flag) is bool, 'flag can only be bool.'
@@ -338,33 +441,44 @@ class Linear(Operation):
     def __init__(self, in_features, out_features, bias=True):
         self.in_features = in_features
         self.out_features = out_features
-        w_shape = (in_features, out_features)
-        self.w = Tensor(shape=w_shape, requires_grad=True, stores_grad=True)
-        if bias:
-            b_shape = (1, out_features)
-            self.b = Tensor(shape=b_shape, requires_grad=True, stores_grad=True)
+        self.w_shape = (in_features, out_features)
+        self.b_shape = (1, out_features)
+        self.bias = bias
         self.init_value = False
 
     def get_params(self):
-        if hasattr(self, 'b'):
+        assert self.init_value is True, 'must initialize before get_params()'
+        if self.bias:
             return (self.w, self.b)
         else:
             return self.w
 
+    def init_params(self, w, b=None):
+        if self.bias:
+            assert b is not None, 'must initialize bias.'
+            assert w.shape == self.w_shape, 'shape of parameters must match.'
+            assert b.shape == self.b_shape, 'shape of parameters must match.'
+            self.w = w
+            self.b = b
+        else:
+            assert b is None, 'cannot initialize bias.'
+            assert w.shape == self.w_shape, 'shape of parameters must match.'
+            self.w = w
+        self.init_value = True
+        return
+
     def __call__(self, x, flag=True):
         assert type(flag) is bool, 'flag can only be bool.'
-        self.flag = flag
         if self.init_value is False:
+            self.w = Tensor(shape=self.w_shape, requires_grad=True, stores_grad=True)
             std = math.sqrt(2.0 / (self.in_features + self.out_features))
             self.w.gaussian(0.0, std)
-            if hasattr(self, 'b'):
+            if self.bias:
+                self.b = Tensor(shape=self.b_shape, requires_grad=True, stores_grad=True)
                 self.b.set_value(0.0)
             self.init_value = True
-        return self._do_forward(x)
-
-    def _do_forward(self, x):
-        y = matmul(x, self.w, self.flag)
-        if hasattr(self, 'b'):
+        y = matmul(x, self.w, flag)
+        if self.bias:
             y = add_bias(y, self.b, axis=0)
         return y
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/403843d8/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 117779e..6abc289 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -65,6 +65,7 @@ from .proto import core_pb2
 from . import singa_wrap as singa
 from .device import get_default_device
 
+
 int32 = core_pb2.kInt
 float32 = core_pb2.kFloat32
 CTensor = singa.Tensor
@@ -107,7 +108,8 @@ class Tensor(object):
         self.requires_grad = requires_grad
         self.stores_grad = stores_grad
         if creator is None:
-            self.creator = Dummy(self)
+            from . import autograd
+            self.creator = autograd.Dummy(self)
         else:
             self.creator = creator
 
@@ -1254,105 +1256,3 @@ def copy_from_numpy(data, np_array):
     else:
         print('Not implemented yet for ', dt)
 
-
-class Operation(object):
-    '''
-    An operation includes the forward and backward function of
-    tensor calculation.
-
-    To add a specific operation Xxxx, subclass Operation and implement
-    forward() and backward(). Then implement a function xxxx which creates
-    a Xxxx instance and calls __call__ to do forward. The autograd engine
-    is able to do backward propagation by calling the backward() of Xxxx
-    automatically. Notice that the tensors are CTensor. NOT Python Tensor.
-    The arguments of forward() and backward() should only include CTensor args; 
-    '''
-
-    def __call__(self, *xs):
-        return self._do_forward(*xs)
-
-    def _do_forward(self, *xs):
-        '''
-        Do not call this function from user code. It is called by __call__().
-
-        Args:
-            xs, Tensor instance(s)
-
-        Returns:
-            Tensor instance(s)
-        '''
-        # TODO add the pre hook
-        assert all([isinstance(x, Tensor) for x in xs]), \
-            'xs should include only Tensor instances'
-
-        # need to do backward if any of its input arg needs gradient
-        self.requires_grad = any([x.requires_grad for x in xs])
-
-        self.src = []
-        for x in xs:
-            if x.stores_grad:
-                # store the tensor whose gradient needs be returned in
-                # backward(), e.g. if x is parameter
-                self.src.append((x.creator, id(x), x, x.stores_grad))
-            else:
-                # for intermediate tensors, they will be released soon;
-                # no need to store them --> use None
-                self.src.append((x.creator, id(x), None, x.stores_grad))
-
-        # get the CTensor (data) if the input arg is Tensor
-        xs = tuple(x.data for x in xs)
-        ys = self.forward(*xs)
-        if not isinstance(ys, tuple):
-            ys = (ys,)
-        # create Tensor based on CTensor(data);
-        # assume outputs are all Tensor instances
-        ys = tuple(Tensor(device=y.device,
-                          data=y,
-                          requires_grad=self.requires_grad,
-                          creator=self) for y in ys)
-        # map from python id to output index
-        self.y_id2idx = {id(y): i for i, y in enumerate(ys)}
-        # TODO add the post hook
-        return ys
-
-    def _do_backward(self, *dys):
-        dxs = self.backward(*dys)
-        if not isinstance(dxs, tuple):
-            dxs = (dxs,)
-        return dxs
-
-    def forward(self, *xs):
-        '''Forward propagation.
-
-        Args:
-            xs: input args consisting of only CTensors.
-
-        Returns:
-            CTensor instance(s)
-        '''
-        raise NotImplementedError
-
-    def backward(self, *dys):
-        ''' Backward propagation.
-
-        Args:
-            dys: input args consisting of only CTensors.
-
-        Returns:
-            CTensor instance(s)
-        '''
-        raise NotImplementedError
-
-
-class Dummy(Operation):
-    '''Dummy operation whice serves as a placehoder for autograd
-
-    Args:
-        name(string): set it for debug
-    '''
-
-    def __init__(self, tensor, name=None):
-        self.name = name
-        self.src = []
-        self.y_id2idx = {id(tensor): 0}
-        self.requires_grad = False


[09/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1. Add Xavier initialization method.

2. package matmul and add_bias operation to form dense function

3. modified examples

4. remove unfriendly API [0]


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ed464efe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ed464efe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ed464efe

Branch: refs/heads/master
Commit: ed464efe6dbb412f7f1d4ae6e86c9db82acee92d
Parents: 8146852
Author: xuewanqi <xu...@u.nus.edu>
Authored: Fri May 11 15:09:22 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mlp.py       |  14 ++---
 examples/autograd/mnist_cnn.py |  35 ++++++-------
 python/singa/autograd.py       | 101 +++++++++++++++++++++++-------------
 python/singa/tensor.py         |   2 +-
 4 files changed, 89 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/examples/autograd/mlp.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mlp.py b/examples/autograd/mlp.py
index 7352c21..0e42d63 100644
--- a/examples/autograd/mlp.py
+++ b/examples/autograd/mlp.py
@@ -58,13 +58,13 @@ if __name__ == '__main__':
     sgd = optimizer.SGD(0.05)
     # training process
     for i in range(1001):
-        x = tensor.matmul(inputs, w0)
-        x = tensor.add_bias(x, b0)
-        x = tensor.relu(x)
-        x = tensor.matmul(x, w1)
-        x = tensor.add_bias(x, b1)
-        x = tensor.soft_max(x)
-        loss = tensor.cross_entropy(x, target)
+        x = autograd.matmul(inputs, w0)
+        x = autograd.add_bias(x, b0)
+        x = autograd.relu(x)
+        x = autograd.matmul(x, w1)
+        x = autograd.add_bias(x, b1)
+        x = autograd.soft_max(x)
+        loss = autograd.cross_entropy(x, target)
         in_grads = autograd.backward(loss)
 
         for param in in_grads:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 79410e1..bc717c7 100644
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -65,33 +65,32 @@ if __name__ == '__main__':
     print ('the shape of testing label is', y_test.shape)
 
     # operations initialization
-    conv1=autograd.Conv2d(3,32)
-    relu1 = autograd.ReLU_Layer()  # same name for tensor.ReLU and layer_ops.ReLU
-    conv2=autograd.Conv2d(32,32)
-    relu2 = autograd.ReLU_Layer()
-    pooling = autograd.MaxPool2d()
-    flatten = autograd.Flatten()
-    linear = autograd.Linear(None, 10)  # in_feature=None for input_shape auto calculation
-    softmax = autograd.SoftMax()
-    cross_entropy = autograd.CrossEntropy()
+    conv1=autograd.Conv2d(3, 32)
+    conv2=autograd.Conv2d(32, 32)
+
+    w0 = tensor.Tensor(shape=(25088, 10), requires_grad=True, stores_grad=True)
+    w0.gaussian(0.0, 0.1)
+    b0 = tensor.Tensor(shape=(1, 10), requires_grad=True, stores_grad=True)
+    b0.set_value(0.0)
 
 
     def forward(x,t):
-        y=conv1(x)[0]
-        y=relu1(y)[0]
-        y=conv2(y)[0]
-        y=relu2(y)[0]
-        y=pooling(y)[0]
-        y=flatten(y)[0]
-        y=linear(y)[0]
-        y=softmax(y)[0]
-        loss=cross_entropy(y, t)[0]
+        y=conv1(x)
+        y=autograd.relu(y)
+        y=conv2(y)
+        y=autograd.relu(y)
+        y=autograd.max_pool_2d(y)
+        y=autograd.flatten(y)
+        y=autograd.dense(y, w0, b0)
+        y=autograd.soft_max(y)
+        loss=autograd.cross_entropy(y, t)
         return loss, y
 
     for epoch in range(epochs):
         for i in range(16):
             inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
             targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+
             loss, y = forward(inputs, targets)
 
             accuracy_rate = accuracy(autograd.ctensor2numpy(y.data),autograd.ctensor2numpy(targets.data))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index b55b7eb..35211de 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -10,11 +10,16 @@ from singa.proto import model_pb2
 from . import singa_wrap as singa
 
 import numpy as np
+import math
 
 CTensor = singa.Tensor
 
 
 class ReLU(Operation):
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        self.flag=flag
+        return self._do_forward(x)
 
     def forward(self, x):
         '''
@@ -24,7 +29,8 @@ class ReLU(Operation):
         Returns:
             a new CTensor whose element y = x if x >= 0; otherwise 0;
         '''
-        self.input = x
+        if self.flag:
+            self.input = x
         return singa.ReLU(x)
 
     def backward(self, dy):
@@ -45,6 +51,10 @@ def relu(x):
 
 class Matmul(Operation):
     '''For matrix multiplication'''
+    def __call__(self, x, w, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        self.flag=flag
+        return self._do_forward(x, w)
 
     def forward(self, x, w):
         '''Do forward propgation.
@@ -58,7 +68,8 @@ class Matmul(Operation):
         Returns:
             a CTensor for the result
         '''
-        self.input = (x, w)
+        if self.flag:
+            self.input = (x, w)
         return singa.Mult(x, w)
 
     def backward(self, dy):
@@ -243,9 +254,7 @@ class Conv2d(Operation):
         cudnn_prefer = 'fastest'
         workspace_byte_limit = 1024
         data_format = 'NCHW'
-        W_specs ={'init': 'gaussian',
-                  'mean':0.0,
-                  'std':0.1}
+        W_specs ={'init': 'xavier'}
         b_specs = {'init': 'constant'}
         input_sample_shape = None
 
@@ -266,7 +275,8 @@ class Conv2d(Operation):
             else:
                 inner_params[kwarg] = kwargs[kwarg]
                 
-
+        self.in_channels = in_channels
+        self.out_channels = out_channels
         self.W_specs=inner_params['W_specs']
         self.b_specs=inner_params['b_specs']
 
@@ -296,25 +306,26 @@ class Conv2d(Operation):
 
         param_data = self.PyLayer.layer.param_values()
         if not hasattr(self, 'w'):
-            self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'gaussian':
+            self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True)
+            if self.W_specs['init'] == 'xavier':
+                std = math.sqrt(2.0/(self.in_channels+self.out_channels))
+                self.w.gaussian(0.0, std)
+            elif self.W_specs['init'] == 'gaussian':
                 if 'std' not in self.W_specs or 'mean' not in self.W_specs:
                     self.w.gaussian(0.0, 0.1)
                 else:
                     self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
             elif self.W_specs['init'] == 'uniform':
                 if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 1.0)
+                    self.w.uniform(0.0, 0.1)
                 else:
                     self.w.uniform(self.W_specs['low'],self.W_specs['high'])
-            elif self.W_specs['init'] == 'xavier':
-                pass  # TODO
 
         xs = [x, self.w]
 
         if len(param_data) == 2:
             if not hasattr(self, 'b'):
-                self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True)
                 if self.b_specs['init'] == 'gaussian':
                     if 'std' not in self.b_specs or 'mean' not in self.b_specs:
                         self.b.gaussian(0.0, 0.1)
@@ -322,18 +333,19 @@ class Conv2d(Operation):
                         self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
                 elif self.b_specs['init'] == 'uniform':
                     if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 1.0)
+                        self.b.uniform(0.0, 0.1)
                     else:
                         self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'xavier':
-                    pass  # TODO
                 elif self.b_specs['init'] == 'constant':
                     self.b.set_value(0.0)
 
             xs.append(self.b)
 
         xs = tuple(xs)
-        return self._do_forward(*xs)
+        return self._do_forward_0(*xs)
+
+    def _do_forward_0(self, *xs):
+        return self._do_forward(*xs)[0]
 
     def forward(self, *xs):
         return self.PyLayer.layer.Forward(self.flag, xs[0])
@@ -351,7 +363,7 @@ class MaxPool2d(Operation):
         data_format = 'NCHW'
         input_sample_shape = None
 
-        allowed_kwargs = {'name': name,
+        inner_params = {'name': name,
                           'border_mode': border_mode,
                           'data_format': data_format,
                           'input_sample_shape': input_sample_shape
@@ -361,7 +373,7 @@ class MaxPool2d(Operation):
             if kwarg not in allowed_kwargs:
                 raise TypeError('Keyword argument not understood:', kwarg)
             else:
-                allowed_kwargs[kwarg] = kwargs[kwarg]
+                inner_params[kwarg] = kwargs[kwarg]
 
         if padding == 0:
             pad = None
@@ -371,9 +383,9 @@ class MaxPool2d(Operation):
         if dilation != 1 or return_indices is not False or ceil_mode is not False:
             raise ValueError('Not implemented yet')
 
-        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
-                                           kernel_size, stride, border_mode,
-                                           pad, data_format, input_sample_shape)
+        self.PyLayer = layer.Pooling2D(inner_params['name'], model_pb2.PoolingConf.MAX,
+                                           kernel_size, stride, inner_params['border_mode'],
+                                           pad, inner_params['data_format'], inner_params['input_sample_shape'])
 
     def __call__(self, x, flag=True):
         assert type(flag) is bool, 'flag can only be bool.'
@@ -393,8 +405,11 @@ class MaxPool2d(Operation):
     def backward(self, dy):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
+def max_pool_2d(x,kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+    return MaxPool2d(kernel_size, stride, padding, dilation, return_indices, ceil_mode, **kwargs)(x)[0]
+
 
-class ReLU_Layer(Operation):
+'''class ReLU_Layer(Operation):
     def __init__(self, name='ReLU', mode='relu',input_sample_shape=None):
         self.PyLayer = layer.Activation(name, mode, input_sample_shape)
 
@@ -412,7 +427,7 @@ class ReLU_Layer(Operation):
         return self.PyLayer.layer.Forward(self.flag, xs[0])
 
     def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]
+        return self.PyLayer.layer.Backward(0, dy)[0]'''
 
 
 class Flatten(Operation):
@@ -435,15 +450,26 @@ class Flatten(Operation):
     def backward(self, dy):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
+def flatten(x, name='Flatten', axis=1, input_sample_shape=None):
+    return Flatten(name,axis,input_sample_shape)(x)[0]
 
-class Linear(Operation):
+def dense(x, w, b=None, bias=True, axis=0):
+    if bias:
+        if b is None:
+            raise ValueError('must input bias value.')
+        else:
+            y= matmul(x, w)
+            y= add_bias(y, b, axis)
+            return y
+    else:
+        return matmul(x, w)
+
+'''class Linear(Operation):
     def __init__(self, in_features, out_features, bias=True, **kwargs):
 
         name = 'Linear'
         W_transpose=False
-        W_specs = {'init': 'gaussian',
-                   'mean': 0.0,
-                   'std': 0.1}
+        W_specs = {'init': 'xavier'}
         b_specs = {'init': 'constant'}
         input_sample_shape = in_features
 
@@ -462,6 +488,8 @@ class Linear(Operation):
             else:
                 inner_params[kwarg] = kwargs[kwarg]
 
+        self.in_features = in_features
+        self.out_features = out_features
         self.W_specs = W_specs
         self.b_specs = b_specs
 
@@ -481,25 +509,26 @@ class Linear(Operation):
 
         param_data = self.PyLayer.layer.param_values()
         if not hasattr(self, 'w'):
-            self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'gaussian':
+            self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True)
+            if self.W_specs['init'] == 'xavier':
+                std = math.sqrt(2.0/(self.in_channels+self.out_channels))
+                self.w.gaussian(0.0, std)
+            elif self.W_specs['init'] == 'gaussian':
                 if 'std' not in self.W_specs or 'mean' not in self.W_specs:
                     self.w.gaussian(0.0, 0.1)
                 else:
                     self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
             elif self.W_specs['init'] == 'uniform':
                 if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 1.0)
+                    self.w.uniform(0.0, 0.1)
                 else:
                     self.w.uniform(self.W_specs['low'],self.W_specs['high'])
-            elif self.W_specs['init'] == 'xavier':
-                pass  # TODO
 
         xs = [x, self.w]
 
         if len(param_data) == 2:
             if not hasattr(self, 'b'):
-                self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True)
                 if self.b_specs['init'] == 'gaussian':
                     if 'std' not in self.b_specs or 'mean' not in self.b_specs:
                         self.b.gaussian(0.0, 0.1)
@@ -507,11 +536,9 @@ class Linear(Operation):
                         self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
                 elif self.b_specs['init'] == 'uniform':
                     if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 1.0)
+                        self.b.uniform(0.0, 0.1)
                     else:
                         self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'xavier':
-                    pass  # TODO
                 elif self.b_specs['init'] == 'constant':
                     self.b.set_value(0.0)
 
@@ -525,7 +552,7 @@ class Linear(Operation):
 
     def backward(self, dy):
         ret = self.PyLayer.layer.Backward(0, dy)
-        return (ret[0],)+ret[1]
+        return (ret[0],)+ret[1]'''
 
 def infer_dependency(op):
     '''

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index ff43cc6..117779e 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -97,7 +97,7 @@ class Tensor(object):
             copy_from_numpy(self.data, data)
         elif isinstance(data, CTensor):
             self.data = data
-            assert data.device() == device, 'not the same device'
+            assert data.device == device, 'not the same device'
         else:
             self.data = CTensor(list(shape), device, dtype)
 


[13/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

A cnn example for new designed API.
it works well in my computer


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8146852c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8146852c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8146852c

Branch: refs/heads/master
Commit: 8146852c715555eb968b9e1414c6a8d82c22b7bd
Parents: f70f120
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Tue May 8 16:55:34 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist.py     | 116 ------------------------------------
 examples/autograd/mnist_cnn.py |  60 ++++++++-----------
 2 files changed, 24 insertions(+), 152 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8146852c/examples/autograd/mnist.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist.py b/examples/autograd/mnist.py
deleted file mode 100644
index e488bac..0000000
--- a/examples/autograd/mnist.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import numpy as np
-from singa import convolution_operation as layer_ops
-from singa import tensor
-from singa import autograd
-from singa import optimizer
-
-
-def load_data(path):
-    f = np.load(path)
-    x_train, y_train = f['x_train'], f['y_train']
-    x_test, y_test = f['x_test'], f['y_test']
-    f.close()
-    return (x_train, y_train), (x_test, y_test)
-
-def to_categorical(y, num_classes):
-    '''
-    Converts a class vector (integers) to binary class matrix.
-
-    Args
-        y: class vector to be converted into a matrix
-            (integers from 0 to num_classes).
-        num_classes: total number of classes.
-
-    Return
-        A binary matrix representation of the input.
-    '''
-    y = np.array(y, dtype='int')
-    n = y.shape[0]
-    categorical = np.zeros((n, num_classes))
-    categorical[np.arange(n), y] = 1
-    categorical=categorical.astype(np.float32)
-    return categorical
-
-def preprocess(data):
-    data=data.astype(np.float32)
-    data /= 255
-    data=np.expand_dims(data, axis=1)
-    return data
-
-def accuracy(pred,target):
-    y = np.argmax(pred, axis=1)
-    t = np.argmax(target, axis=1)
-    a = y == t
-    return np.array(a, 'int').sum() / float(len(t))
-
-
-if __name__ == '__main__':
-
-    batch_number=600
-    num_classes = 10
-    epochs = 1
-
-    sgd = optimizer.SGD(0.05)
-    #opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
-
-    train,test=load_data('/Users/wanqixue/Downloads/mnist.npz')
-    x_train=preprocess(train[0])
-    y_train = to_categorical(train[1], num_classes)
-
-    x_test=preprocess(test[0])
-    y_test=to_categorical(test[1],num_classes)
-    print 'the shape of training data is',x_train.shape
-    print 'the shape of training label is',y_train.shape
-    print 'the shape of testing data is', x_test.shape
-    print 'the shape of testing label is', y_test.shape
-
-
-    conv1=layer_ops.Convolution2D('conv1',32,3,1,border_mode='same')
-    conv2=layer_ops.Convolution2D('conv2',32,3,1,border_mode='same')
-
-    #operations can create when call
-    relu1=layer_ops.Activation('relu1')
-    relu2 = layer_ops.Activation('relu2')
-    pooling= layer_ops.MaxPooling2D('pooling',3,1,border_mode='same')
-    flatten=layer_ops.Flatten('flatten')
-    matmul=tensor.Matmul()
-    add_bias=tensor.AddBias()
-    softmax=tensor.SoftMax()
-    cross_entropy=tensor.CrossEntropy()
-    #avoid repeat create operations
-
-    w = tensor.Tensor(shape=(25088, 10), requires_grad=True, stores_grad=True) #package a dense layer to calculate the shape automatically
-    w.gaussian(0.0, 0.1)
-
-    b = tensor.Tensor(shape=(1, 10), requires_grad=True, stores_grad=True)
-    b.set_value(0.0)
-
-    def forward(x,t):
-        y=conv1(x)[0]
-        y=relu1(y)[0]
-        y=conv2(y)[0]
-        y=relu2(y)[0]
-        y=pooling(y)[0]
-        y=flatten(y)[0]
-        y=matmul(y,w)[0]
-        y=add_bias(y,b)[0]
-        y=softmax(y)[0]
-        loss=cross_entropy(y,t)[0]
-        return loss, y
-
-    for epoch in range(epochs):
-        #for i in range(batch_number):
-        for i in range(50):
-            inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
-            targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
-            loss, y = forward(inputs,targets)
-
-            accuracy_rate = accuracy(tensor.ctensor2numpy(y.data),tensor.ctensor2numpy(targets.data))
-            if (i % 5 == 0):
-                print 'accuracy is:', accuracy_rate,'loss is:', tensor.ctensor2numpy(loss.data)[0]
-
-            in_grads = autograd.backward(loss)
-
-            for param in in_grads:
-                sgd.apply(0, in_grads[param], param, '')
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8146852c/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index e488bac..79410e1 100644
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -1,10 +1,11 @@
 import numpy as np
-from singa import convolution_operation as layer_ops
+
 from singa import tensor
 from singa import autograd
 from singa import optimizer
 
 
+
 def load_data(path):
     f = np.load(path)
     x_train, y_train = f['x_train'], f['y_train']
@@ -51,7 +52,6 @@ if __name__ == '__main__':
     epochs = 1
 
     sgd = optimizer.SGD(0.05)
-    #opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
 
     train,test=load_data('/Users/wanqixue/Downloads/mnist.npz')
     x_train=preprocess(train[0])
@@ -59,31 +59,22 @@ if __name__ == '__main__':
 
     x_test=preprocess(test[0])
     y_test=to_categorical(test[1],num_classes)
-    print 'the shape of training data is',x_train.shape
-    print 'the shape of training label is',y_train.shape
-    print 'the shape of testing data is', x_test.shape
-    print 'the shape of testing label is', y_test.shape
-
-
-    conv1=layer_ops.Convolution2D('conv1',32,3,1,border_mode='same')
-    conv2=layer_ops.Convolution2D('conv2',32,3,1,border_mode='same')
+    print ('the shape of training data is',x_train.shape)
+    print ('the shape of training label is',y_train.shape)
+    print ('the shape of testing data is', x_test.shape)
+    print ('the shape of testing label is', y_test.shape)
+
+    # operations initialization
+    conv1=autograd.Conv2d(3,32)
+    relu1 = autograd.ReLU_Layer()  # same name for tensor.ReLU and layer_ops.ReLU
+    conv2=autograd.Conv2d(32,32)
+    relu2 = autograd.ReLU_Layer()
+    pooling = autograd.MaxPool2d()
+    flatten = autograd.Flatten()
+    linear = autograd.Linear(None, 10)  # in_feature=None for input_shape auto calculation
+    softmax = autograd.SoftMax()
+    cross_entropy = autograd.CrossEntropy()
 
-    #operations can create when call
-    relu1=layer_ops.Activation('relu1')
-    relu2 = layer_ops.Activation('relu2')
-    pooling= layer_ops.MaxPooling2D('pooling',3,1,border_mode='same')
-    flatten=layer_ops.Flatten('flatten')
-    matmul=tensor.Matmul()
-    add_bias=tensor.AddBias()
-    softmax=tensor.SoftMax()
-    cross_entropy=tensor.CrossEntropy()
-    #avoid repeat create operations
-
-    w = tensor.Tensor(shape=(25088, 10), requires_grad=True, stores_grad=True) #package a dense layer to calculate the shape automatically
-    w.gaussian(0.0, 0.1)
-
-    b = tensor.Tensor(shape=(1, 10), requires_grad=True, stores_grad=True)
-    b.set_value(0.0)
 
     def forward(x,t):
         y=conv1(x)[0]
@@ -92,25 +83,22 @@ if __name__ == '__main__':
         y=relu2(y)[0]
         y=pooling(y)[0]
         y=flatten(y)[0]
-        y=matmul(y,w)[0]
-        y=add_bias(y,b)[0]
+        y=linear(y)[0]
         y=softmax(y)[0]
-        loss=cross_entropy(y,t)[0]
+        loss=cross_entropy(y, t)[0]
         return loss, y
 
     for epoch in range(epochs):
-        #for i in range(batch_number):
-        for i in range(50):
+        for i in range(16):
             inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
             targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
-            loss, y = forward(inputs,targets)
+            loss, y = forward(inputs, targets)
 
-            accuracy_rate = accuracy(tensor.ctensor2numpy(y.data),tensor.ctensor2numpy(targets.data))
+            accuracy_rate = accuracy(autograd.ctensor2numpy(y.data),autograd.ctensor2numpy(targets.data))
             if (i % 5 == 0):
-                print 'accuracy is:', accuracy_rate,'loss is:', tensor.ctensor2numpy(loss.data)[0]
+                print('accuracy is:', accuracy_rate,'loss is:', autograd.ctensor2numpy(loss.data)[0])
 
             in_grads = autograd.backward(loss)
 
             for param in in_grads:
-                sgd.apply(0, in_grads[param], param, '')
-
+                sgd.apply(0, in_grads[param], param, '')
\ No newline at end of file


[08/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1. rewrite Linear opertion

2. avoid absolute path

3. modified mnist_cnn example

4. delete unnecessary codes


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/40e609a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/40e609a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/40e609a4

Branch: refs/heads/master
Commit: 40e609a4e807595d335adaae17966daa8adac04c
Parents: ed464ef
Author: xuewanqi <xu...@u.nus.edu>
Authored: Tue May 15 14:58:41 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist_cnn.py |  60 +++++----
 python/singa/autograd.py       | 260 +++++++++++-------------------------
 2 files changed, 109 insertions(+), 211 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/40e609a4/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index bc717c7..7afbb9e 100644
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -1,11 +1,12 @@
 import numpy as np
+import argparse
+import os
 
 from singa import tensor
 from singa import autograd
 from singa import optimizer
 
 
-
 def load_data(path):
     f = np.load(path)
     x_train, y_train = f['x_train'], f['y_train']
@@ -13,6 +14,7 @@ def load_data(path):
     f.close()
     return (x_train, y_train), (x_test, y_test)
 
+
 def to_categorical(y, num_classes):
     '''
     Converts a class vector (integers) to binary class matrix.
@@ -32,12 +34,14 @@ def to_categorical(y, num_classes):
     categorical=categorical.astype(np.float32)
     return categorical
 
+
 def preprocess(data):
     data=data.astype(np.float32)
     data /= 255
     data=np.expand_dims(data, axis=1)
     return data
 
+
 def accuracy(pred,target):
     y = np.argmax(pred, axis=1)
     t = np.argmax(target, axis=1)
@@ -47,47 +51,51 @@ def accuracy(pred,target):
 
 if __name__ == '__main__':
 
-    batch_number=600
+    parser = argparse.ArgumentParser(description='Train CNN over MNIST')
+    parser.add_argument('file_path', type=str, help='the dataset path')
+    args = parser.parse_args()
+
+    assert os.path.exists(args.file_path), 'Pls download the MNIST dataset from' \
+     'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
+
+    train, test = load_data(args.file_path)
+
+    batch_number = 600
     num_classes = 10
     epochs = 1
 
     sgd = optimizer.SGD(0.05)
 
-    train,test=load_data('/Users/wanqixue/Downloads/mnist.npz')
-    x_train=preprocess(train[0])
+    x_train = preprocess(train[0])
     y_train = to_categorical(train[1], num_classes)
 
     x_test=preprocess(test[0])
     y_test=to_categorical(test[1],num_classes)
-    print ('the shape of training data is',x_train.shape)
-    print ('the shape of training label is',y_train.shape)
+    print ('the shape of training data is', x_train.shape)
+    print ('the shape of training label is', y_train.shape)
     print ('the shape of testing data is', x_test.shape)
     print ('the shape of testing label is', y_test.shape)
 
     # operations initialization
-    conv1=autograd.Conv2d(3, 32)
-    conv2=autograd.Conv2d(32, 32)
-
-    w0 = tensor.Tensor(shape=(25088, 10), requires_grad=True, stores_grad=True)
-    w0.gaussian(0.0, 0.1)
-    b0 = tensor.Tensor(shape=(1, 10), requires_grad=True, stores_grad=True)
-    b0.set_value(0.0)
-
-
-    def forward(x,t):
-        y=conv1(x)
-        y=autograd.relu(y)
-        y=conv2(y)
-        y=autograd.relu(y)
-        y=autograd.max_pool_2d(y)
-        y=autograd.flatten(y)
-        y=autograd.dense(y, w0, b0)
-        y=autograd.soft_max(y)
-        loss=autograd.cross_entropy(y, t)
+    conv1 = autograd.Conv2d(3, 32)
+    conv2 = autograd.Conv2d(32, 32)
+    linear = autograd.Linear(32*28*28, 10)
+
+
+    def forward(x, t):
+        y = conv1(x)
+        y = autograd.relu(y)
+        y = conv2(y)
+        y = autograd.relu(y)
+        y = autograd.max_pool_2d(y)
+        y = autograd.flatten(y)
+        y = linear(y)
+        y = autograd.soft_max(y)
+        loss = autograd.cross_entropy(y, t)
         return loss, y
 
     for epoch in range(epochs):
-        for i in range(16):
+        for i in range(batch_number):
             inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
             targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/40e609a4/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 35211de..daae43c 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -84,8 +84,8 @@ class Matmul(Operation):
             singa.Mult(self.input[0].T(), dy)
 
 
-def matmul(x, w):
-    return Matmul()(x, w)[0]
+def matmul(x, w, flag=True):
+    return Matmul()(x, w, flag)[0]
 
 
 class AddBias(Operation):
@@ -246,27 +246,19 @@ def ctensor2numpy(x):
     np_array = x.GetFloatValue(int(x.Size()))
     return np_array.reshape(x.shape())
 
+
 class Conv2d(Operation):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs):
-
-        name = 'Conv2d'
-        border_mode = 'same'
-        cudnn_prefer = 'fastest'
-        workspace_byte_limit = 1024
-        data_format = 'NCHW'
-        W_specs ={'init': 'xavier'}
-        b_specs = {'init': 'constant'}
-        input_sample_shape = None
-
-        inner_params = {'name':name,
-                          'border_mode':border_mode,
-                          'cudnn_prefer':cudnn_prefer,
-                          'workspace_byte_limit':workspace_byte_limit,
-                          'data_format':data_format,
-                          'W_specs':W_specs,
-                          'b_specs':b_specs,
-                          'input_sample_shape':input_sample_shape
-                          }
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True,
+                 **kwargs):
+
+        inner_params = {'name': 'Conv2d',
+                          'border_mode': 'same',
+                          'cudnn_prefer': 'fastest',
+                          'workspace_byte_limit': 1024,
+                          'data_format': 'NCHW',
+                          'W_specs': {'init': 'xavier'},
+                          'b_specs': {'init': 'constant'},
+                          'input_sample_shape': None}
         # TODO valid value of inner_params check
 
         for kwarg in kwargs:
@@ -277,8 +269,13 @@ class Conv2d(Operation):
                 
         self.in_channels = in_channels
         self.out_channels = out_channels
-        self.W_specs=inner_params['W_specs']
-        self.b_specs=inner_params['b_specs']
+        self.W_specs = inner_params['W_specs']
+        self.b_specs = inner_params['b_specs']
+
+        if isinstance(kernel_size, int):
+            self.kernel_size = (kernel_size, kernel_size)
+        else:
+            self.kernel_size = kernel_size
 
         if padding == 0:
             pad = None
@@ -294,6 +291,12 @@ class Conv2d(Operation):
                  data_format=inner_params['data_format'], use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
                  pad=pad, input_sample_shape=inner_params['input_sample_shape'])
 
+    def get_params(self):
+        assert self.has_setup, \
+            'Must call setup() before get_params()'
+        params = self.PyLayer.layer.param_values()
+        return params
+
     def __call__(self, x, flag=True):
         assert type(flag) is bool, 'flag can only be bool.'
         if flag:
@@ -305,68 +308,74 @@ class Conv2d(Operation):
             self.PyLayer.setup(x.shape[1:])
 
         param_data = self.PyLayer.layer.param_values()
+
         if not hasattr(self, 'w'):
             self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'xavier':
-                std = math.sqrt(2.0/(self.in_channels+self.out_channels))
-                self.w.gaussian(0.0, std)
-            elif self.W_specs['init'] == 'gaussian':
-                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
-                    self.w.gaussian(0.0, 0.1)
-                else:
-                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
-            elif self.W_specs['init'] == 'uniform':
-                if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 0.1)
-                else:
-                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
+            std = math.sqrt(2.0/(self.in_channels*self.kernel_size[0]*self.kernel_size[1]+self.out_channels))
+            self.w.gaussian(0.0, std)
 
         xs = [x, self.w]
 
         if len(param_data) == 2:
             if not hasattr(self, 'b'):
                 self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True)
-                if self.b_specs['init'] == 'gaussian':
-                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
-                        self.b.gaussian(0.0, 0.1)
-                    else:
-                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
-                elif self.b_specs['init'] == 'uniform':
-                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 0.1)
-                    else:
-                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'constant':
-                    self.b.set_value(0.0)
+                self.b.set_value(0.0)
 
             xs.append(self.b)
 
         xs = tuple(xs)
-        return self._do_forward_0(*xs)
-
-    def _do_forward_0(self, *xs):
         return self._do_forward(*xs)[0]
 
     def forward(self, *xs):
         return self.PyLayer.layer.Forward(self.flag, xs[0])
 
     def backward(self, dy):
-        ret = self.PyLayer.layer.Backward(0, dy)
+        ret = self.PyLayer.layer.Backward(self.flag, dy)
         return (ret[0],)+ret[1]
 
 
+class Linear(Operation):
+    def __init__(self, in_features, out_features, bias=True):
+        self.in_features = in_features
+        self.out_features = out_features
+        w_shape = (in_features, out_features)
+        self.w = Tensor(shape=w_shape, requires_grad=True, stores_grad=True)
+        if bias:
+            b_shape = (1, out_features)
+            self.b = Tensor(shape=b_shape, requires_grad=True, stores_grad=True)
+        self.init_value = False
+
+    def get_params(self):
+        if hasattr(self, 'b'):
+            return (self.w, self.b)
+        else:
+            return self.w
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        self.flag = flag
+        if self.init_value is False:
+            std = math.sqrt(2.0 / (self.in_features + self.out_features))
+            self.w.gaussian(0.0, std)
+            if hasattr(self, 'b'):
+                self.b.set_value(0.0)
+            self.init_value = True
+        return self._do_forward(x)
+
+    def _do_forward(self, x):
+        y = matmul(x, self.w, self.flag)
+        if hasattr(self, 'b'):
+            y = add_bias(y, self.b, axis=0)
+        return y
+
+
 class MaxPool2d(Operation):
     def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
 
-        name = 'MaxPool2d'
-        border_mode = 'same'
-        data_format = 'NCHW'
-        input_sample_shape = None
-
-        inner_params = {'name': name,
-                          'border_mode': border_mode,
-                          'data_format': data_format,
-                          'input_sample_shape': input_sample_shape
+        inner_params = {'name': 'MaxPool2d',
+                          'border_mode': 'same',
+                          'data_format': 'NCHW',
+                          'input_sample_shape': None
                           }
 
         for kwarg in kwargs:
@@ -405,29 +414,9 @@ class MaxPool2d(Operation):
     def backward(self, dy):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
-def max_pool_2d(x,kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
-    return MaxPool2d(kernel_size, stride, padding, dilation, return_indices, ceil_mode, **kwargs)(x)[0]
-
 
-'''class ReLU_Layer(Operation):
-    def __init__(self, name='ReLU', mode='relu',input_sample_shape=None):
-        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]'''
+def max_pool_2d(x, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+    return MaxPool2d(kernel_size, stride, padding, dilation, return_indices, ceil_mode, **kwargs)(x)[0]
 
 
 class Flatten(Operation):
@@ -450,109 +439,10 @@ class Flatten(Operation):
     def backward(self, dy):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
-def flatten(x, name='Flatten', axis=1, input_sample_shape=None):
-    return Flatten(name,axis,input_sample_shape)(x)[0]
-
-def dense(x, w, b=None, bias=True, axis=0):
-    if bias:
-        if b is None:
-            raise ValueError('must input bias value.')
-        else:
-            y= matmul(x, w)
-            y= add_bias(y, b, axis)
-            return y
-    else:
-        return matmul(x, w)
-
-'''class Linear(Operation):
-    def __init__(self, in_features, out_features, bias=True, **kwargs):
-
-        name = 'Linear'
-        W_transpose=False
-        W_specs = {'init': 'xavier'}
-        b_specs = {'init': 'constant'}
-        input_sample_shape = in_features
-
-        inner_params = {'name': name,
-                          'W_transpose': W_transpose,
-                          'W_specs': W_specs,
-                          'b_specs': b_specs,
-                          'input_sample_shape': input_sample_shape
-                          }
 
-        # TODO valid value of inner_params check
-
-        for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
-                raise TypeError('Keyword argument not understood:', kwarg)
-            else:
-                inner_params[kwarg] = kwargs[kwarg]
-
-        self.in_features = in_features
-        self.out_features = out_features
-        self.W_specs = W_specs
-        self.b_specs = b_specs
-
-        self.PyLayer = layer.Dense(inner_params['name'], num_output=out_features, use_bias=bias,
-                     W_specs=self.W_specs, b_specs=self.b_specs,
-                     W_transpose=inner_params['W_transpose'], input_sample_shape=inner_params['input_sample_shape'])
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-
-        param_data = self.PyLayer.layer.param_values()
-        if not hasattr(self, 'w'):
-            self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'xavier':
-                std = math.sqrt(2.0/(self.in_channels+self.out_channels))
-                self.w.gaussian(0.0, std)
-            elif self.W_specs['init'] == 'gaussian':
-                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
-                    self.w.gaussian(0.0, 0.1)
-                else:
-                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
-            elif self.W_specs['init'] == 'uniform':
-                if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 0.1)
-                else:
-                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
-
-        xs = [x, self.w]
-
-        if len(param_data) == 2:
-            if not hasattr(self, 'b'):
-                self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True)
-                if self.b_specs['init'] == 'gaussian':
-                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
-                        self.b.gaussian(0.0, 0.1)
-                    else:
-                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
-                elif self.b_specs['init'] == 'uniform':
-                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 0.1)
-                    else:
-                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'constant':
-                    self.b.set_value(0.0)
-
-            xs.append(self.b)
-
-        xs = tuple(xs)
-        return self._do_forward(*xs)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
+def flatten(x, name='Flatten', axis=1, input_sample_shape=None):
+    return Flatten(name, axis, input_sample_shape)(x)[0]
 
-    def backward(self, dy):
-        ret = self.PyLayer.layer.Backward(0, dy)
-        return (ret[0],)+ret[1]'''
 
 def infer_dependency(op):
     '''


[07/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1. fix bugs for the new design API

2. add flags for training or evaluation process.

3. add changeable initialization method


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b136ac0a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b136ac0a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b136ac0a

Branch: refs/heads/master
Commit: b136ac0a8ce42fa6e5e123874c77039eaf86e556
Parents: 6402a53
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Mon May 7 15:31:29 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 python/singa/layer_ops.py | 228 +++++++++++++++++++++++++++++++----------
 1 file changed, 172 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b136ac0a/python/singa/layer_ops.py
----------------------------------------------------------------------
diff --git a/python/singa/layer_ops.py b/python/singa/layer_ops.py
index e5ef45f..dcbacf9 100644
--- a/python/singa/layer_ops.py
+++ b/python/singa/layer_ops.py
@@ -3,8 +3,8 @@ from singa import layer
 from singa.proto import model_pb2
 
 
-class Conv2D(tensor.Operation):
-    def __init__(self,in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,**kwargs):
+class Conv2d(tensor.Operation):
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs):
 
         name='Conv2d'
         border_mode = 'same'
@@ -31,80 +31,148 @@ class Conv2D(tensor.Operation):
             else:
                 allowed_kwargs[kwarg] = kwargs[kwarg]
 
-        '''
-        How to match Keras:
+        self.W_specs=W_specs
+        self.b_specs=b_specs
+
+        if padding == 0:
+            pad = None
+        else:
+            pad = padding
+
+        if dilation != 1 or groups != 1:
+            raise ValueError('Not implemented yet')
 
-        in Keras conv2d, self.kernel record how to generate kernel (shape,initializer,name,regularizer,constraint),
-        it can be interpret to
-        shape -> kernel+input_sample_shape[0](nb_channels)+nb_kernels,
-        initializer, name, regularizer, constraint -> W_specs.
-        '''
         self.PyLayer = layer.Conv2D(name, nb_kernels=out_channels, kernel=kernel_size, stride=stride, border_mode=border_mode,
                  cudnn_prefer=cudnn_prefer, workspace_byte_limit=workspace_byte_limit,
-                 data_format=data_format, use_bias=bias, W_specs=W_specs, b_specs=b_specs,
-                 pad=padding, input_sample_shape=input_sample_shape)
+                 data_format=data_format, use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
+                 pad=pad, input_sample_shape=input_sample_shape)
 
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
 
-    def __call__(self, x):
         if not self.PyLayer.has_setup:
             self.PyLayer.setup(x.shape[1:])
+
         param_data = self.PyLayer.layer.param_values()
         if not hasattr(self, 'w'):
             self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            self.w.gaussian(0.0, 0.1)  # TODO realize other initialization method according to W_specs
-        
+            if self.W_specs['init'] == 'gaussian':
+                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
+                    self.w.gaussian(0.0, 0.1)
+                else:
+                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
+            elif self.W_specs['init'] == 'uniform':
+                if 'low' not in self.W_specs or 'high' not in self.W_specs:
+                    self.w.uniform(0.0, 1.0)
+                else:
+                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
+            elif self.W_specs['init'] == 'xavier':
+                pass  # TODO
+
         xs = [x, self.w]
 
         if len(param_data) == 2:
-            self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-            self.b.set_value(0.0)  # TODO realize other initialization method according to b_specs
+            if not hasattr(self, 'b'):
+                self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                if self.b_specs['init'] == 'gaussian':
+                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
+                        self.b.gaussian(0.0, 0.1)
+                    else:
+                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
+                elif self.b_specs['init'] == 'uniform':
+                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
+                        self.b.uniform(0.0, 1.0)
+                    else:
+                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
+                elif self.b_specs['init'] == 'xavier':
+                    pass  # TODO
+                else:
+                    self.b.set_value(0.0)
+
             xs.append(self.b)
 
         xs = tuple(xs)
         return self._do_forward(*xs)
 
-    def forward(self, flag=True,*xs):
-        if flag is True:
-            return self.PyLayer.layer.Forward(4, xs[0])
-        else:
-            return self.PyLayer.layer.Forward(8, xs[0])
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
 
     def backward(self, dy):
         ret = self.PyLayer.layer.Backward(0, dy)
         return (ret[0],)+ret[1]
 
 
-class MaxPooling2D(tensor.Operation):
-    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', input_sample_shape=None):
+class MaxPool2d(tensor.Operation):
+    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+
+        name = 'MaxPool2d'
+        border_mode = 'same'
+        data_format = 'NCHW'
+        input_sample_shape = None
+
+        allowed_kwargs = {'name': name,
+                          'border_mode': border_mode,
+                          'data_format': data_format,
+                          'input_sample_shape': input_sample_shape
+                          }
+
+        for kwarg in kwargs:
+            if kwarg not in allowed_kwargs:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                allowed_kwargs[kwarg] = kwargs[kwarg]
+
+        if padding == 0:
+            pad = None
+        else:
+            pad = padding
+
+        if dilation != 1 or return_indices is not False or ceil_mode is not False:
+            raise ValueError('Not implemented yet')
 
         self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
-                                           kernel, stride, border_mode,
+                                           kernel_size, stride, border_mode,
                                            pad, data_format, input_sample_shape)
 
-    def __call__(self, x):
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
         if not self.PyLayer.has_setup:
             self.PyLayer.setup(x.shape[1:])
+
         return self._do_forward(x)
 
-    def forward(self, x):
-        return self.PyLayer.layer.Forward(4, x)
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
 
     def backward(self, dy):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
 
-class Activation(tensor.Operation):
-    def __init__(self,name, mode='relu',input_sample_shape=None):
+class ReLU(tensor.Operation):
+    def __init__(self, name='ReLU', mode='relu',input_sample_shape=None):
         self.PyLayer = layer.Activation(name, mode, input_sample_shape)
 
-    def __call__(self, x):
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
         if not self.PyLayer.has_setup:
             self.PyLayer.setup(x.shape[1:])
         return self._do_forward(x)
 
-    def forward(self, x):
-        return self.PyLayer.layer.Forward(4, x)
+    def forward(self, flag=True, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
 
     def backward(self, dy):
         return self.PyLayer.layer.Backward(0, dy)[0]
@@ -114,58 +182,106 @@ class Flatten(tensor.Operation):
     def __init__(self, name, axis=1, input_sample_shape=None):
         self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
 
-    def __call__(self, x):
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
         if not self.PyLayer.has_setup:
             self.PyLayer.setup(x.shape[1:])
         return self._do_forward(x)
 
-    def forward(self, x):
-        return self.PyLayer.layer.Forward(4, x)
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
 
     def backward(self, dy):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
 
-class Dense(tensor.Operation):
-    def __init__(self, name, num_output, use_bias=True,
-                     W_specs=None, b_specs=None,
-                     W_transpose=False, input_sample_shape=None):
+class Linear(tensor.Operation):
+    def __init__(self, in_features, out_features, bias=True, **kwargs):
+
+        name = 'Linear'
+        W_transpose=False
+        W_specs = None
+        b_specs = None
+        input_sample_shape = in_features
+
+        allowed_kwargs = {'name': name,
+                          'W_transpose': W_transpose,
+                          'W_specs': W_specs,
+                          'b_specs': b_specs,
+                          'input_sample_shape': input_sample_shape
+                          }
+
+        for kwarg in kwargs:
+            if kwarg not in allowed_kwargs:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                allowed_kwargs[kwarg] = kwargs[kwarg]
 
-        self.PyLayer = layer.Dense(name, num_output=num_output, use_bias=use_bias,
-                     W_specs=W_specs, b_specs=b_specs,
+        self.W_specs = W_specs
+        self.b_specs = b_specs
+
+        self.PyLayer = layer.Dense(name, num_output=out_features, use_bias=bias,
+                     W_specs=self.W_specs, b_specs=self.b_specs,
                      W_transpose=W_transpose, input_sample_shape=input_sample_shape)
 
-    def __call__(self, x):
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
         if not self.PyLayer.has_setup:
             self.PyLayer.setup(x.shape[1:])
 
         param_data = self.PyLayer.layer.param_values()
-
         if not hasattr(self, 'w'):
             self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            self.w.gaussian(0.0, 0.1)  # TODO realize other initialization method according to W_specs
+            if self.W_specs['init'] == 'gaussian':
+                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
+                    self.w.gaussian(0.0, 0.1)
+                else:
+                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
+            elif self.W_specs['init'] == 'uniform':
+                if 'low' not in self.W_specs or 'high' not in self.W_specs:
+                    self.w.uniform(0.0, 1.0)
+                else:
+                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
+            elif self.W_specs['init'] == 'xavier':
+                pass  # TODO
 
         xs = [x, self.w]
 
         if len(param_data) == 2:
-            self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-            self.b.set_value(0.0)  # TODO realize other initialization method according to b_specs
+            if not hasattr(self, 'b'):
+                self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                if self.b_specs['init'] == 'gaussian':
+                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
+                        self.b.gaussian(0.0, 0.1)
+                    else:
+                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
+                elif self.b_specs['init'] == 'uniform':
+                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
+                        self.b.uniform(0.0, 1.0)
+                    else:
+                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
+                elif self.b_specs['init'] == 'xavier':
+                    pass  # TODO
+                else:
+                    self.b.set_value(0.0)
+
             xs.append(self.b)
 
         xs = tuple(xs)
         return self._do_forward(*xs)
 
     def forward(self, *xs):
-        return self.PyLayer.layer.Forward(4, xs[0])
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
 
     def backward(self, dy):
         ret = self.PyLayer.layer.Backward(0, dy)
         return (ret[0],)+ret[1]
-
-
-
-
-
-
-
-


[11/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1. Change the API of Conv2d operations into Pytorch style. next step is to confirm the new design workable.

2. Add flags in Conv2d forward function

3. Delete extra file


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6402a53d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6402a53d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6402a53d

Branch: refs/heads/master
Commit: 6402a53d31185bb455c9d796b03d01f3dc476de3
Parents: 5abcc6e
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Sat May 5 17:00:22 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 python/singa/convolution_operation.py | 158 -----------------------------
 python/singa/layer_ops.py             |  59 ++++++-----
 2 files changed, 36 insertions(+), 181 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6402a53d/python/singa/convolution_operation.py
----------------------------------------------------------------------
diff --git a/python/singa/convolution_operation.py b/python/singa/convolution_operation.py
deleted file mode 100644
index 8475c21..0000000
--- a/python/singa/convolution_operation.py
+++ /dev/null
@@ -1,158 +0,0 @@
-from singa import tensor
-from singa import layer
-from singa.proto import model_pb2
-from singa import autograd
-
-
-
-def ctensor2numpy(x):
-    '''
-    // For test use.
-
-
-    To be used in SoftMax Operation.
-    Convert a singa_tensor to numpy_tensor.
-    '''
-    np_array = x.GetFloatValue(int(x.Size()))
-    return np_array.reshape(x.shape())
-
-class Convolution2D(tensor.Operation):
-    def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
-                 cudnn_prefer='fastest', workspace_byte_limit=1024,
-                 data_format='NCHW', use_bias=True, W_specs=None, b_specs=None,
-                 pad=None,input_sample_shape=None):
-        '''
-        How to match Keras:
-
-        in Keras conv2d, self.kernel record how to generate kernel (shape,initializer,name,regularizer,constraint),
-        it can be interpret to
-        shape -> kernel+input_sample_shape[0](nb_channels)+nb_kernels,
-        initializer, name, regularizer, constraint -> W_specs.
-        '''
-        self.PyLayer = layer.Conv2D(name, nb_kernels, kernel=kernel, stride=stride, border_mode=border_mode,
-                 cudnn_prefer=cudnn_prefer, workspace_byte_limit=workspace_byte_limit,
-                 data_format=data_format, use_bias=use_bias, W_specs=W_specs, b_specs=b_specs,
-                 pad=pad, input_sample_shape=input_sample_shape)
-
-
-    def __call__(self, x):
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        param_data = self.PyLayer.layer.param_values()
-
-        if not hasattr(self, 'w'):
-            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            self.w.gaussian(0.0, 0.1)  # TODO realize other initialization method according to W_specs
-
-        xs = [x, self.w]
-
-        if len(param_data) == 2:
-            self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-            self.b.set_value(0.0)  # TODO realize other initialization method according to b_specs
-            xs.append(self.b)
-
-        xs = tuple(xs)
-        return self._do_forward(*xs)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(4, xs[0])  #how ktrain works?  flag & ktrain.
-
-    def backward(self, dy):
-        ret = self.PyLayer.layer.Backward(True, dy)
-        return (ret[0],)+ret[1]
-
-
-class MaxPooling2D(tensor.Operation):
-    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', input_sample_shape=None):
-
-        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
-                                           kernel, stride, border_mode,
-                                           pad, data_format, input_sample_shape)
-
-    def __call__(self, x):
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, x):
-        return self.PyLayer.layer.Forward(4, x)
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(True, dy)[0]   # how backward() return?
-
-
-class Activation(tensor.Operation):
-    def __init__(self,name, mode='relu',input_sample_shape=None):
-        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
-
-    def __call__(self, x):
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, x):
-        return self.PyLayer.layer.Forward(4, x)
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(True, dy)[0]
-
-
-class Flatten(tensor.Operation):
-    def __init__(self, name, axis=1, input_sample_shape=None):
-        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
-
-    def __call__(self, x):
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, x):
-        return self.PyLayer.layer.Forward(4, x)
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(True, dy)[0]
-
-
-class Dense(tensor.Operation):
-    '''
-    Need to implemented?
-    '''
-    pass
-
-
-inputs=tensor.Tensor(shape=(10, 2, 3, 3), requires_grad=False, stores_grad=False)
-inputs.gaussian(1,0)
-
-x = Convolution2D('conv',4)(inputs)[0]
-print(x.shape)
-
-x = MaxPooling2D('pooling')(x)[0]
-print(x.shape)
-
-x = Activation('relu')(x)[0]
-print(x.shape)
-
-x = Flatten('flatten')(x)[0]
-print(x.shape)
-
-w0 = tensor.Tensor(shape=(4, 10), requires_grad=True, stores_grad=True)
-w0.gaussian(0.0, 0.1)
-x = tensor.matmul(x, w0)
-print(x.shape)
-
-x = tensor.softmax(x)
-
-target=tensor.Tensor(shape=(10, 10), requires_grad=False, stores_grad=False)
-target.gaussian(0.0 ,0.1)
-loss = tensor.cross_entropy(x, target)
-
-grad=autograd.backward(loss)
-print(grad)
-
-
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6402a53d/python/singa/layer_ops.py
----------------------------------------------------------------------
diff --git a/python/singa/layer_ops.py b/python/singa/layer_ops.py
index 1ca888f..e5ef45f 100644
--- a/python/singa/layer_ops.py
+++ b/python/singa/layer_ops.py
@@ -1,26 +1,36 @@
 from singa import tensor
 from singa import layer
 from singa.proto import model_pb2
-from singa import autograd
 
 
+class Conv2D(tensor.Operation):
+    def __init__(self,in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,**kwargs):
+
+        name='Conv2d'
+        border_mode = 'same'
+        cudnn_prefer = 'fastest'
+        workspace_byte_limit = 1024
+        data_format = 'NCHW'
+        W_specs = None
+        b_specs = None
+        input_sample_shape=None
+
+        allowed_kwargs = {'name':name,
+                          'border_mode':border_mode,
+                          'cudnn_prefer':cudnn_prefer,
+                          'workspace_byte_limit':workspace_byte_limit,
+                          'data_format':data_format,
+                          'W_specs':W_specs,
+                          'b_specs':b_specs,
+                          'input_sample_shape':input_sample_shape
+                          }
+
+        for kwarg in kwargs:
+            if kwarg not in allowed_kwargs:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                allowed_kwargs[kwarg] = kwargs[kwarg]
 
-def ctensor2numpy(x):
-    '''
-    // For test use.
-
-
-    To be used in SoftMax Operation.
-    Convert a singa_tensor to numpy_tensor.
-    '''
-    np_array = x.GetFloatValue(int(x.Size()))
-    return np_array.reshape(x.shape())
-
-class Convolution2D(tensor.Operation):
-    def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
-                 cudnn_prefer='fastest', workspace_byte_limit=1024,
-                 data_format='NCHW', use_bias=True, W_specs=None, b_specs=None,
-                 pad=None,input_sample_shape=None):
         '''
         How to match Keras:
 
@@ -29,10 +39,10 @@ class Convolution2D(tensor.Operation):
         shape -> kernel+input_sample_shape[0](nb_channels)+nb_kernels,
         initializer, name, regularizer, constraint -> W_specs.
         '''
-        self.PyLayer = layer.Conv2D(name, nb_kernels, kernel=kernel, stride=stride, border_mode=border_mode,
+        self.PyLayer = layer.Conv2D(name, nb_kernels=out_channels, kernel=kernel_size, stride=stride, border_mode=border_mode,
                  cudnn_prefer=cudnn_prefer, workspace_byte_limit=workspace_byte_limit,
-                 data_format=data_format, use_bias=use_bias, W_specs=W_specs, b_specs=b_specs,
-                 pad=pad, input_sample_shape=input_sample_shape)
+                 data_format=data_format, use_bias=bias, W_specs=W_specs, b_specs=b_specs,
+                 pad=padding, input_sample_shape=input_sample_shape)
 
 
     def __call__(self, x):
@@ -53,8 +63,11 @@ class Convolution2D(tensor.Operation):
         xs = tuple(xs)
         return self._do_forward(*xs)
 
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(4, xs[0])  #how ktrain works?  flag & ktrain.
+    def forward(self, flag=True,*xs):
+        if flag is True:
+            return self.PyLayer.layer.Forward(4, xs[0])
+        else:
+            return self.PyLayer.layer.Forward(8, xs[0])
 
     def backward(self, dy):
         ret = self.PyLayer.layer.Backward(0, dy)
@@ -78,7 +91,7 @@ class MaxPooling2D(tensor.Operation):
         return self.PyLayer.layer.Forward(4, x)
 
     def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]   # how backward() return?
+        return self.PyLayer.layer.Backward(0, dy)[0]
 
 
 class Activation(tensor.Operation):


[10/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

Posted by mo...@apache.org.
SINGA-349 Create layer operations for autograd

1. Modified the new designed API, fix bugs.
   It can work in my computer.

2. Do some codes relocation:
   - Integrate operations into autograd.py
   - Delete extra files
   - Delete operations in tensor.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f70f1201
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f70f1201
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f70f1201

Branch: refs/heads/master
Commit: f70f120119744015b6f9f670ff4596fd2fc920b7
Parents: b136ac0
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Tue May 8 15:33:34 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 python/singa/autograd.py  | 527 ++++++++++++++++++++++++++++++++++++++++-
 python/singa/layer_ops.py | 287 ----------------------
 python/singa/tensor.py    | 224 +-----------------
 3 files changed, 527 insertions(+), 511 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f70f1201/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 175f8b2..b55b7eb 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -1,6 +1,531 @@
+
+from __future__ import division
+
+from functools import reduce
 from collections import Counter, deque
-from .tensor import Tensor, Dummy
+from .tensor import Tensor, Dummy, Operation
+
+from singa import layer
+from singa.proto import model_pb2
+from . import singa_wrap as singa
+
+import numpy as np
+
+CTensor = singa.Tensor
+
+
+class ReLU(Operation):
+
+    def forward(self, x):
+        '''
+        Args:
+            x(CTensor): input tensor
+
+        Returns:
+            a new CTensor whose element y = x if x >= 0; otherwise 0;
+        '''
+        self.input = x
+        return singa.ReLU(x)
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy(CTensor): dL / dy
+
+        Returns:
+            dx(CTensor): dL / dx = dy if x >= 0; otherwise 0;
+        '''
+        dx = singa.GTFloat(self.input, 0.0)
+        return singa.__mul__(dy, dx)
+
+
+def relu(x):
+    return ReLU()(x)[0]
+
+
+class Matmul(Operation):
+    '''For matrix multiplication'''
+
+    def forward(self, x, w):
+        '''Do forward propgation.
+
+        Store the x(or w) if w(or x) requires gradient.
+
+        Args:
+            x (CTensor): matrix
+            w (CTensor): matrix
+
+        Returns:
+            a CTensor for the result
+        '''
+        self.input = (x, w)
+        return singa.Mult(x, w)
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss
+
+        Returns:
+            a tuple for (dx, dw)
+        '''
+        return singa.Mult(dy, self.input[1].T()), \
+            singa.Mult(self.input[0].T(), dy)
+
+
+def matmul(x, w):
+    return Matmul()(x, w)[0]
+
+
+class AddBias(Operation):
+    '''
+    Add Bias to each row / column of the Tensor, depending on the parameter axis.
+    '''
+
+    def __init__(self, axis=0):
+        '''
+        To indicate the calculation axis, 0 for row, 1 for column.
+
+        Args:
+            axis: 0 or 1, default is 0.
+        '''
+        self.axis = axis
+
+    def forward(self, x, b):
+        '''
+        Args:
+            x: matrix.
+            b: bias to be added.
+
+        Return:
+            the result Tensor
+        '''
+        if self.axis == 0:
+            singa.AddRow(b, x)
+        elif self.axis == 1:
+            singa.AddColumn(b, x)
+        return x
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss.
+
+        Return:
+            a tuple for (db, dx), db is data for dL / db, dx is data
+            for dL / dx.
+        '''
+        if self.axis == 0:
+            return dy, singa.Sum(dy, 0)
+        elif self.axis == 1:
+            return dy, singa.Sum(dy, 0)
+
+
+def add_bias(x, b, axis=0):
+    return AddBias(axis)(x, b)[0]
+
+
+class SoftMax(Operation):
+    '''
+    Apply SoftMax for each row of the Tensor or each column of the Tensor
+    according to the parameter axis.
+    '''
+
+    def __init__(self, axis=0):
+        self.axis = axis
+
+    def forward(self, x):
+        '''
+        Args:
+            x(data): the input 1d or 2d tensor
+
+        Returns:
+            the result Tensor
+        '''
+        if self.axis == 1:
+            x = x.T()
+        self.output = singa.SoftMax(x)
+        if self.axis == 0:
+            return self.output
+        elif self.axis == 1:
+            return self.output.T()
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss
+
+        Returns:
+            dx (Ctensor): data for the dL / dx, L is the loss,
+            x is the input of current Opertion
+        '''
+        # calculations are made on numpy array
+        if self.axis == 1:
+            dy = dy.T()
+        grad = ctensor2numpy(dy)
+        output = ctensor2numpy(self.output)
+        out_1 = np.einsum('ki,ki->ki', grad, output)
+        medium_out = np.einsum('ki,kj->kij', output, output)
+        out_2 = np.einsum('kij,kj->ki', medium_out, grad)
+        out = out_1 - out_2
+        dx = CTensor(out_1.shape)
+        dx.CopyFloatDataFromHostPtr(out.flatten())
+        if self.axis == 0:
+            return dx
+        elif self.axis == 1:
+            return dx.T()
+
+
+def soft_max(x, axis=0):
+    return SoftMax(axis)(x)[0]
+
+
+class CrossEntropy(Operation):
+    '''
+    Calculte CrossEntropy loss for a batch of training data.
+
+    '''
+
+    def forward(self, x, t):
+        '''
+        Args:
+            x (CTensor): 1d or 2d tensor, the prediction data(output) of current network.
+            t (CTensor): 1d or 2d tensor, the target data for training.
+
+        Returns:
+            loss (CTensor): scalar.
+        '''
+        loss = CTensor((1,))
+        loss_data = -singa.SumAsFloat(singa.__mul__(t, singa.Log(x)))
+        loss.SetFloatValue(loss_data / x.shape()[0])
+        self.x = x
+        self.t = t
+        self.input = (x, t)
+        return loss
+
+    def backward(self, dy=1.0):
+        '''
+        Args:
+            dy (float or CTensor): scalar, accumulate gradient from outside of current network, usually
+            equal to 1.0
+
+        Returns:
+            dx (CTensor): data for the dL /dx, L is the loss, x is the output of current network.
+            note that this is true for dy = 1.0
+        '''
+        dx = singa.__div__(self.t, self.x)
+        dx *= float(-1 / self.x.shape()[0])
+        if isinstance(dy, float):
+            # dtype of dy: float
+            dx *= dy
+            return dx, None
+        elif isinstance(dy, CTensor):
+            pass  # TODO, broadcast elementwise multiply seems not support
+
+
+def cross_entropy(y, t):
+    return CrossEntropy()(y, t)[0]
+
+
+def ctensor2numpy(x):
+    '''
+    To be used in SoftMax Operation.
+    Convert a singa_tensor to numpy_tensor.
+    '''
+    np_array = x.GetFloatValue(int(x.Size()))
+    return np_array.reshape(x.shape())
+
+class Conv2d(Operation):
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs):
+
+        name = 'Conv2d'
+        border_mode = 'same'
+        cudnn_prefer = 'fastest'
+        workspace_byte_limit = 1024
+        data_format = 'NCHW'
+        W_specs ={'init': 'gaussian',
+                  'mean':0.0,
+                  'std':0.1}
+        b_specs = {'init': 'constant'}
+        input_sample_shape = None
+
+        inner_params = {'name':name,
+                          'border_mode':border_mode,
+                          'cudnn_prefer':cudnn_prefer,
+                          'workspace_byte_limit':workspace_byte_limit,
+                          'data_format':data_format,
+                          'W_specs':W_specs,
+                          'b_specs':b_specs,
+                          'input_sample_shape':input_sample_shape
+                          }
+        # TODO valid value of inner_params check
+
+        for kwarg in kwargs:
+            if kwarg not in inner_params:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                inner_params[kwarg] = kwargs[kwarg]
+                
+
+        self.W_specs=inner_params['W_specs']
+        self.b_specs=inner_params['b_specs']
+
+        if padding == 0:
+            pad = None
+        else:
+            pad = padding
+
+        if dilation != 1 or groups != 1:
+            raise ValueError('Not implemented yet')
+
+        self.PyLayer = layer.Conv2D(inner_params['name'], nb_kernels=out_channels, kernel=kernel_size, stride=stride,
+                                    border_mode=inner_params['border_mode'],
+                 cudnn_prefer=inner_params['cudnn_prefer'], workspace_byte_limit=inner_params['workspace_byte_limit'],
+                 data_format=inner_params['data_format'], use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
+                 pad=pad, input_sample_shape=inner_params['input_sample_shape'])
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+
+        param_data = self.PyLayer.layer.param_values()
+        if not hasattr(self, 'w'):
+            self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            if self.W_specs['init'] == 'gaussian':
+                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
+                    self.w.gaussian(0.0, 0.1)
+                else:
+                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
+            elif self.W_specs['init'] == 'uniform':
+                if 'low' not in self.W_specs or 'high' not in self.W_specs:
+                    self.w.uniform(0.0, 1.0)
+                else:
+                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
+            elif self.W_specs['init'] == 'xavier':
+                pass  # TODO
+
+        xs = [x, self.w]
+
+        if len(param_data) == 2:
+            if not hasattr(self, 'b'):
+                self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                if self.b_specs['init'] == 'gaussian':
+                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
+                        self.b.gaussian(0.0, 0.1)
+                    else:
+                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
+                elif self.b_specs['init'] == 'uniform':
+                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
+                        self.b.uniform(0.0, 1.0)
+                    else:
+                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
+                elif self.b_specs['init'] == 'xavier':
+                    pass  # TODO
+                elif self.b_specs['init'] == 'constant':
+                    self.b.set_value(0.0)
+
+            xs.append(self.b)
+
+        xs = tuple(xs)
+        return self._do_forward(*xs)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        ret = self.PyLayer.layer.Backward(0, dy)
+        return (ret[0],)+ret[1]
+
+
+class MaxPool2d(Operation):
+    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+
+        name = 'MaxPool2d'
+        border_mode = 'same'
+        data_format = 'NCHW'
+        input_sample_shape = None
+
+        allowed_kwargs = {'name': name,
+                          'border_mode': border_mode,
+                          'data_format': data_format,
+                          'input_sample_shape': input_sample_shape
+                          }
+
+        for kwarg in kwargs:
+            if kwarg not in allowed_kwargs:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                allowed_kwargs[kwarg] = kwargs[kwarg]
+
+        if padding == 0:
+            pad = None
+        else:
+            pad = padding
+
+        if dilation != 1 or return_indices is not False or ceil_mode is not False:
+            raise ValueError('Not implemented yet')
+
+        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
+                                           kernel_size, stride, border_mode,
+                                           pad, data_format, input_sample_shape)
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+
+        return self._do_forward(x)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class ReLU_Layer(Operation):
+    def __init__(self, name='ReLU', mode='relu',input_sample_shape=None):
+        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class Flatten(Operation):
+    def __init__(self, name='Flatten', axis=1, input_sample_shape=None):
+        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class Linear(Operation):
+    def __init__(self, in_features, out_features, bias=True, **kwargs):
+
+        name = 'Linear'
+        W_transpose=False
+        W_specs = {'init': 'gaussian',
+                   'mean': 0.0,
+                   'std': 0.1}
+        b_specs = {'init': 'constant'}
+        input_sample_shape = in_features
+
+        inner_params = {'name': name,
+                          'W_transpose': W_transpose,
+                          'W_specs': W_specs,
+                          'b_specs': b_specs,
+                          'input_sample_shape': input_sample_shape
+                          }
+
+        # TODO valid value of inner_params check
+
+        for kwarg in kwargs:
+            if kwarg not in allowed_kwargs:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                inner_params[kwarg] = kwargs[kwarg]
+
+        self.W_specs = W_specs
+        self.b_specs = b_specs
+
+        self.PyLayer = layer.Dense(inner_params['name'], num_output=out_features, use_bias=bias,
+                     W_specs=self.W_specs, b_specs=self.b_specs,
+                     W_transpose=inner_params['W_transpose'], input_sample_shape=inner_params['input_sample_shape'])
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+
+        param_data = self.PyLayer.layer.param_values()
+        if not hasattr(self, 'w'):
+            self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            if self.W_specs['init'] == 'gaussian':
+                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
+                    self.w.gaussian(0.0, 0.1)
+                else:
+                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
+            elif self.W_specs['init'] == 'uniform':
+                if 'low' not in self.W_specs or 'high' not in self.W_specs:
+                    self.w.uniform(0.0, 1.0)
+                else:
+                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
+            elif self.W_specs['init'] == 'xavier':
+                pass  # TODO
+
+        xs = [x, self.w]
+
+        if len(param_data) == 2:
+            if not hasattr(self, 'b'):
+                self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                if self.b_specs['init'] == 'gaussian':
+                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
+                        self.b.gaussian(0.0, 0.1)
+                    else:
+                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
+                elif self.b_specs['init'] == 'uniform':
+                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
+                        self.b.uniform(0.0, 1.0)
+                    else:
+                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
+                elif self.b_specs['init'] == 'xavier':
+                    pass  # TODO
+                elif self.b_specs['init'] == 'constant':
+                    self.b.set_value(0.0)
+
+            xs.append(self.b)
+
+        xs = tuple(xs)
+        return self._do_forward(*xs)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
 
+    def backward(self, dy):
+        ret = self.PyLayer.layer.Backward(0, dy)
+        return (ret[0],)+ret[1]
 
 def infer_dependency(op):
     '''

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f70f1201/python/singa/layer_ops.py
----------------------------------------------------------------------
diff --git a/python/singa/layer_ops.py b/python/singa/layer_ops.py
deleted file mode 100644
index dcbacf9..0000000
--- a/python/singa/layer_ops.py
+++ /dev/null
@@ -1,287 +0,0 @@
-from singa import tensor
-from singa import layer
-from singa.proto import model_pb2
-
-
-class Conv2d(tensor.Operation):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs):
-
-        name='Conv2d'
-        border_mode = 'same'
-        cudnn_prefer = 'fastest'
-        workspace_byte_limit = 1024
-        data_format = 'NCHW'
-        W_specs = None
-        b_specs = None
-        input_sample_shape=None
-
-        allowed_kwargs = {'name':name,
-                          'border_mode':border_mode,
-                          'cudnn_prefer':cudnn_prefer,
-                          'workspace_byte_limit':workspace_byte_limit,
-                          'data_format':data_format,
-                          'W_specs':W_specs,
-                          'b_specs':b_specs,
-                          'input_sample_shape':input_sample_shape
-                          }
-
-        for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
-                raise TypeError('Keyword argument not understood:', kwarg)
-            else:
-                allowed_kwargs[kwarg] = kwargs[kwarg]
-
-        self.W_specs=W_specs
-        self.b_specs=b_specs
-
-        if padding == 0:
-            pad = None
-        else:
-            pad = padding
-
-        if dilation != 1 or groups != 1:
-            raise ValueError('Not implemented yet')
-
-        self.PyLayer = layer.Conv2D(name, nb_kernels=out_channels, kernel=kernel_size, stride=stride, border_mode=border_mode,
-                 cudnn_prefer=cudnn_prefer, workspace_byte_limit=workspace_byte_limit,
-                 data_format=data_format, use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
-                 pad=pad, input_sample_shape=input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-
-        param_data = self.PyLayer.layer.param_values()
-        if not hasattr(self, 'w'):
-            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'gaussian':
-                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
-                    self.w.gaussian(0.0, 0.1)
-                else:
-                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
-            elif self.W_specs['init'] == 'uniform':
-                if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 1.0)
-                else:
-                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
-            elif self.W_specs['init'] == 'xavier':
-                pass  # TODO
-
-        xs = [x, self.w]
-
-        if len(param_data) == 2:
-            if not hasattr(self, 'b'):
-                self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-                if self.b_specs['init'] == 'gaussian':
-                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
-                        self.b.gaussian(0.0, 0.1)
-                    else:
-                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
-                elif self.b_specs['init'] == 'uniform':
-                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 1.0)
-                    else:
-                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'xavier':
-                    pass  # TODO
-                else:
-                    self.b.set_value(0.0)
-
-            xs.append(self.b)
-
-        xs = tuple(xs)
-        return self._do_forward(*xs)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        ret = self.PyLayer.layer.Backward(0, dy)
-        return (ret[0],)+ret[1]
-
-
-class MaxPool2d(tensor.Operation):
-    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
-
-        name = 'MaxPool2d'
-        border_mode = 'same'
-        data_format = 'NCHW'
-        input_sample_shape = None
-
-        allowed_kwargs = {'name': name,
-                          'border_mode': border_mode,
-                          'data_format': data_format,
-                          'input_sample_shape': input_sample_shape
-                          }
-
-        for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
-                raise TypeError('Keyword argument not understood:', kwarg)
-            else:
-                allowed_kwargs[kwarg] = kwargs[kwarg]
-
-        if padding == 0:
-            pad = None
-        else:
-            pad = padding
-
-        if dilation != 1 or return_indices is not False or ceil_mode is not False:
-            raise ValueError('Not implemented yet')
-
-        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
-                                           kernel_size, stride, border_mode,
-                                           pad, data_format, input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-
-        return self._do_forward(x)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]
-
-
-class ReLU(tensor.Operation):
-    def __init__(self, name='ReLU', mode='relu',input_sample_shape=None):
-        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, flag=True, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]
-
-
-class Flatten(tensor.Operation):
-    def __init__(self, name, axis=1, input_sample_shape=None):
-        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]
-
-
-class Linear(tensor.Operation):
-    def __init__(self, in_features, out_features, bias=True, **kwargs):
-
-        name = 'Linear'
-        W_transpose=False
-        W_specs = None
-        b_specs = None
-        input_sample_shape = in_features
-
-        allowed_kwargs = {'name': name,
-                          'W_transpose': W_transpose,
-                          'W_specs': W_specs,
-                          'b_specs': b_specs,
-                          'input_sample_shape': input_sample_shape
-                          }
-
-        for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
-                raise TypeError('Keyword argument not understood:', kwarg)
-            else:
-                allowed_kwargs[kwarg] = kwargs[kwarg]
-
-        self.W_specs = W_specs
-        self.b_specs = b_specs
-
-        self.PyLayer = layer.Dense(name, num_output=out_features, use_bias=bias,
-                     W_specs=self.W_specs, b_specs=self.b_specs,
-                     W_transpose=W_transpose, input_sample_shape=input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-
-        param_data = self.PyLayer.layer.param_values()
-        if not hasattr(self, 'w'):
-            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'gaussian':
-                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
-                    self.w.gaussian(0.0, 0.1)
-                else:
-                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
-            elif self.W_specs['init'] == 'uniform':
-                if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 1.0)
-                else:
-                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
-            elif self.W_specs['init'] == 'xavier':
-                pass  # TODO
-
-        xs = [x, self.w]
-
-        if len(param_data) == 2:
-            if not hasattr(self, 'b'):
-                self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-                if self.b_specs['init'] == 'gaussian':
-                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
-                        self.b.gaussian(0.0, 0.1)
-                    else:
-                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
-                elif self.b_specs['init'] == 'uniform':
-                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 1.0)
-                    else:
-                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'xavier':
-                    pass  # TODO
-                else:
-                    self.b.set_value(0.0)
-
-            xs.append(self.b)
-
-        xs = tuple(xs)
-        return self._do_forward(*xs)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        ret = self.PyLayer.layer.Backward(0, dy)
-        return (ret[0],)+ret[1]

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f70f1201/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index fcf00b8..ff43cc6 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -97,7 +97,7 @@ class Tensor(object):
             copy_from_numpy(self.data, data)
         elif isinstance(data, CTensor):
             self.data = data
-            assert data.device == device, 'not the same device'
+            assert data.device() == device, 'not the same device'
         else:
             self.data = CTensor(list(shape), device, dtype)
 
@@ -1356,225 +1356,3 @@ class Dummy(Operation):
         self.src = []
         self.y_id2idx = {id(tensor): 0}
         self.requires_grad = False
-
-
-class ReLU(Operation):
-
-    def forward(self, x):
-        '''
-        Args:
-            x(CTensor): input tensor
-
-        Returns:
-            a new CTensor whose element y = x if x >= 0; otherwise 0;
-        '''
-        self.input = x
-        return singa.ReLU(x)
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy(CTensor): dL / dy
-
-        Returns:
-            dx(CTensor): dL / dx = dy if x >= 0; otherwise 0;
-        '''
-        dx = singa.GTFloat(self.input, 0.0)
-        return singa.__mul__(dy, dx)
-
-
-def relu(x):
-    return ReLU()(x)[0]
-
-
-class Matmul(Operation):
-    '''For matrix multiplication'''
-
-    def forward(self, x, w):
-        '''Do forward propgation.
-
-        Store the x(or w) if w(or x) requires gradient.
-
-        Args:
-            x (CTensor): matrix
-            w (CTensor): matrix
-
-        Returns:
-            a CTensor for the result
-        '''
-        self.input = (x, w)
-        return singa.Mult(x, w)
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy (CTensor): data for the dL / dy, L is the loss
-
-        Returns:
-            a tuple for (dx, dw)
-        '''
-        return singa.Mult(dy, self.input[1].T()), \
-            singa.Mult(self.input[0].T(), dy)
-
-
-def matmul(x, w):
-    return Matmul()(x, w)[0]
-
-
-class AddBias(Operation):
-    '''
-    Add Bias to each row / column of the Tensor, depending on the parameter axis.
-    '''
-
-    def __init__(self, axis=0):
-        '''
-        To indicate the calculation axis, 0 for row, 1 for column.
-
-        Args:
-            axis: 0 or 1, default is 0.
-        '''
-        self.axis = axis
-
-    def forward(self, x, b):
-        '''
-        Args:
-            x: matrix.
-            b: bias to be added.
-
-        Return:
-            the result Tensor
-        '''
-        if self.axis == 0:
-            singa.AddRow(b, x)
-        elif self.axis == 1:
-            singa.AddColumn(b, x)
-        return x
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy (CTensor): data for the dL / dy, L is the loss.
-
-        Return:
-            a tuple for (db, dx), db is data for dL / db, dx is data
-            for dL / dx.
-        '''
-        if self.axis == 0:
-            return dy, singa.Sum(dy, 0)
-        elif self.axis == 1:
-            return dy, singa.Sum(dy, 0)
-
-
-def add_bias(x, b, axis=0):
-    return AddBias(axis)(x, b)[0]
-
-
-class SoftMax(Operation):
-    '''
-    Apply SoftMax for each row of the Tensor or each column of the Tensor
-    according to the parameter axis.
-    '''
-
-    def __init__(self, axis=0):
-        self.axis = axis
-
-    def forward(self, x):
-        '''
-        Args:
-            x(data): the input 1d or 2d tensor
-
-        Returns:
-            the result Tensor
-        '''
-        if self.axis == 1:
-            x = x.T()
-        self.output = singa.SoftMax(x)
-        if self.axis == 0:
-            return self.output
-        elif self.axis == 1:
-            return self.output.T()
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy (CTensor): data for the dL / dy, L is the loss
-
-        Returns:
-            dx (Ctensor): data for the dL / dx, L is the loss,
-            x is the input of current Opertion
-        '''
-        # calculations are made on numpy array
-        if self.axis == 1:
-            dy = dy.T()
-        grad = ctensor2numpy(dy)
-        output = ctensor2numpy(self.output)
-        out_1 = np.einsum('ki,ki->ki', grad, output)
-        medium_out = np.einsum('ki,kj->kij', output, output)
-        out_2 = np.einsum('kij,kj->ki', medium_out, grad)
-        out = out_1 - out_2
-        dx = CTensor(out_1.shape)
-        dx.CopyFloatDataFromHostPtr(out.flatten())
-        if self.axis == 0:
-            return dx
-        elif self.axis == 1:
-            return dx.T()
-
-
-def soft_max(x, axis=0):
-    return SoftMax(axis)(x)[0]
-
-
-class CrossEntropy(Operation):
-    '''
-    Calculte CrossEntropy loss for a batch of training data.
-
-    '''
-
-    def forward(self, x, t):
-        '''
-        Args:
-            x (CTensor): 1d or 2d tensor, the prediction data(output) of current network.
-            t (CTensor): 1d or 2d tensor, the target data for training.
-
-        Returns:
-            loss (CTensor): scalar.
-        '''
-        loss = CTensor((1,))
-        loss_data = -singa.SumAsFloat(singa.__mul__(t, singa.Log(x)))
-        loss.SetFloatValue(loss_data / x.shape()[0])
-        self.x = x
-        self.t = t
-        self.input = (x, t)
-        return loss
-
-    def backward(self, dy=1.0):
-        '''
-        Args:
-            dy (float or CTensor): scalar, accumulate gradient from outside of current network, usually
-            equal to 1.0
-
-        Returns:
-            dx (CTensor): data for the dL /dx, L is the loss, x is the output of current network.
-            note that this is true for dy = 1.0
-        '''
-        dx = singa.__div__(self.t, self.x)
-        dx *= float(-1 / self.x.shape()[0])
-        if isinstance(dy, float):
-            # dtype of dy: float
-            dx *= dy
-            return dx, None
-        elif isinstance(dy, CTensor):
-            pass  # TODO, broadcast elementwise multiply seems not support
-
-
-def cross_entropy(y, t):
-    return CrossEntropy()(y, t)[0]
-
-
-def ctensor2numpy(x):
-    '''
-    To be used in SoftMax Operation.
-    Convert a singa_tensor to numpy_tensor.
-    '''
-    np_array = x.GetFloatValue(int(x.Size()))
-    return np_array.reshape(x.shape())