You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by mo...@apache.org on 2018/05/18 04:52:16 UTC
[10/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

SINGA-349 Create layer operations for autograd

1. Modified the new designed API, fix bugs.
   It can work in my computer.

2. Do some codes relocation:
   - Integrate operations into autograd.py
   - Delete extra files
   - Delete operations in tensor.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f70f1201
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f70f1201
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f70f1201

Branch: refs/heads/master
Commit: f70f120119744015b6f9f670ff4596fd2fc920b7
Parents: b136ac0
Author: xuewanqi <36...@users.noreply.github.com>
Authored: Tue May 8 15:33:34 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 python/singa/autograd.py  | 527 ++++++++++++++++++++++++++++++++++++++++-
 python/singa/layer_ops.py | 287 ----------------------
 python/singa/tensor.py    | 224 +-----------------
 3 files changed, 527 insertions(+), 511 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f70f1201/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 175f8b2..b55b7eb 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -1,6 +1,531 @@
+
+from __future__ import division
+
+from functools import reduce
 from collections import Counter, deque
-from .tensor import Tensor, Dummy
+from .tensor import Tensor, Dummy, Operation
+
+from singa import layer
+from singa.proto import model_pb2
+from . import singa_wrap as singa
+
+import numpy as np
+
+CTensor = singa.Tensor
+
+
+class ReLU(Operation):
+
+    def forward(self, x):
+        '''
+        Args:
+            x(CTensor): input tensor
+
+        Returns:
+            a new CTensor whose element y = x if x >= 0; otherwise 0;
+        '''
+        self.input = x
+        return singa.ReLU(x)
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy(CTensor): dL / dy
+
+        Returns:
+            dx(CTensor): dL / dx = dy if x >= 0; otherwise 0;
+        '''
+        dx = singa.GTFloat(self.input, 0.0)
+        return singa.__mul__(dy, dx)
+
+
+def relu(x):
+    return ReLU()(x)[0]
+
+
+class Matmul(Operation):
+    '''For matrix multiplication'''
+
+    def forward(self, x, w):
+        '''Do forward propgation.
+
+        Store the x(or w) if w(or x) requires gradient.
+
+        Args:
+            x (CTensor): matrix
+            w (CTensor): matrix
+
+        Returns:
+            a CTensor for the result
+        '''
+        self.input = (x, w)
+        return singa.Mult(x, w)
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss
+
+        Returns:
+            a tuple for (dx, dw)
+        '''
+        return singa.Mult(dy, self.input[1].T()), \
+            singa.Mult(self.input[0].T(), dy)
+
+
+def matmul(x, w):
+    return Matmul()(x, w)[0]
+
+
+class AddBias(Operation):
+    '''
+    Add Bias to each row / column of the Tensor, depending on the parameter axis.
+    '''
+
+    def __init__(self, axis=0):
+        '''
+        To indicate the calculation axis, 0 for row, 1 for column.
+
+        Args:
+            axis: 0 or 1, default is 0.
+        '''
+        self.axis = axis
+
+    def forward(self, x, b):
+        '''
+        Args:
+            x: matrix.
+            b: bias to be added.
+
+        Return:
+            the result Tensor
+        '''
+        if self.axis == 0:
+            singa.AddRow(b, x)
+        elif self.axis == 1:
+            singa.AddColumn(b, x)
+        return x
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss.
+
+        Return:
+            a tuple for (db, dx), db is data for dL / db, dx is data
+            for dL / dx.
+        '''
+        if self.axis == 0:
+            return dy, singa.Sum(dy, 0)
+        elif self.axis == 1:
+            return dy, singa.Sum(dy, 0)
+
+
+def add_bias(x, b, axis=0):
+    return AddBias(axis)(x, b)[0]
+
+
+class SoftMax(Operation):
+    '''
+    Apply SoftMax for each row of the Tensor or each column of the Tensor
+    according to the parameter axis.
+    '''
+
+    def __init__(self, axis=0):
+        self.axis = axis
+
+    def forward(self, x):
+        '''
+        Args:
+            x(data): the input 1d or 2d tensor
+
+        Returns:
+            the result Tensor
+        '''
+        if self.axis == 1:
+            x = x.T()
+        self.output = singa.SoftMax(x)
+        if self.axis == 0:
+            return self.output
+        elif self.axis == 1:
+            return self.output.T()
+
+    def backward(self, dy):
+        '''
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss
+
+        Returns:
+            dx (Ctensor): data for the dL / dx, L is the loss,
+            x is the input of current Opertion
+        '''
+        # calculations are made on numpy array
+        if self.axis == 1:
+            dy = dy.T()
+        grad = ctensor2numpy(dy)
+        output = ctensor2numpy(self.output)
+        out_1 = np.einsum('ki,ki->ki', grad, output)
+        medium_out = np.einsum('ki,kj->kij', output, output)
+        out_2 = np.einsum('kij,kj->ki', medium_out, grad)
+        out = out_1 - out_2
+        dx = CTensor(out_1.shape)
+        dx.CopyFloatDataFromHostPtr(out.flatten())
+        if self.axis == 0:
+            return dx
+        elif self.axis == 1:
+            return dx.T()
+
+
+def soft_max(x, axis=0):
+    return SoftMax(axis)(x)[0]
+
+
+class CrossEntropy(Operation):
+    '''
+    Calculte CrossEntropy loss for a batch of training data.
+
+    '''
+
+    def forward(self, x, t):
+        '''
+        Args:
+            x (CTensor): 1d or 2d tensor, the prediction data(output) of current network.
+            t (CTensor): 1d or 2d tensor, the target data for training.
+
+        Returns:
+            loss (CTensor): scalar.
+        '''
+        loss = CTensor((1,))
+        loss_data = -singa.SumAsFloat(singa.__mul__(t, singa.Log(x)))
+        loss.SetFloatValue(loss_data / x.shape()[0])
+        self.x = x
+        self.t = t
+        self.input = (x, t)
+        return loss
+
+    def backward(self, dy=1.0):
+        '''
+        Args:
+            dy (float or CTensor): scalar, accumulate gradient from outside of current network, usually
+            equal to 1.0
+
+        Returns:
+            dx (CTensor): data for the dL /dx, L is the loss, x is the output of current network.
+            note that this is true for dy = 1.0
+        '''
+        dx = singa.__div__(self.t, self.x)
+        dx *= float(-1 / self.x.shape()[0])
+        if isinstance(dy, float):
+            # dtype of dy: float
+            dx *= dy
+            return dx, None
+        elif isinstance(dy, CTensor):
+            pass  # TODO, broadcast elementwise multiply seems not support
+
+
+def cross_entropy(y, t):
+    return CrossEntropy()(y, t)[0]
+
+
+def ctensor2numpy(x):
+    '''
+    To be used in SoftMax Operation.
+    Convert a singa_tensor to numpy_tensor.
+    '''
+    np_array = x.GetFloatValue(int(x.Size()))
+    return np_array.reshape(x.shape())
+
+class Conv2d(Operation):
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs):
+
+        name = 'Conv2d'
+        border_mode = 'same'
+        cudnn_prefer = 'fastest'
+        workspace_byte_limit = 1024
+        data_format = 'NCHW'
+        W_specs ={'init': 'gaussian',
+                  'mean':0.0,
+                  'std':0.1}
+        b_specs = {'init': 'constant'}
+        input_sample_shape = None
+
+        inner_params = {'name':name,
+                          'border_mode':border_mode,
+                          'cudnn_prefer':cudnn_prefer,
+                          'workspace_byte_limit':workspace_byte_limit,
+                          'data_format':data_format,
+                          'W_specs':W_specs,
+                          'b_specs':b_specs,
+                          'input_sample_shape':input_sample_shape
+                          }
+        # TODO valid value of inner_params check
+
+        for kwarg in kwargs:
+            if kwarg not in inner_params:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                inner_params[kwarg] = kwargs[kwarg]
+                
+
+        self.W_specs=inner_params['W_specs']
+        self.b_specs=inner_params['b_specs']
+
+        if padding == 0:
+            pad = None
+        else:
+            pad = padding
+
+        if dilation != 1 or groups != 1:
+            raise ValueError('Not implemented yet')
+
+        self.PyLayer = layer.Conv2D(inner_params['name'], nb_kernels=out_channels, kernel=kernel_size, stride=stride,
+                                    border_mode=inner_params['border_mode'],
+                 cudnn_prefer=inner_params['cudnn_prefer'], workspace_byte_limit=inner_params['workspace_byte_limit'],
+                 data_format=inner_params['data_format'], use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
+                 pad=pad, input_sample_shape=inner_params['input_sample_shape'])
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+
+        param_data = self.PyLayer.layer.param_values()
+        if not hasattr(self, 'w'):
+            self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            if self.W_specs['init'] == 'gaussian':
+                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
+                    self.w.gaussian(0.0, 0.1)
+                else:
+                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
+            elif self.W_specs['init'] == 'uniform':
+                if 'low' not in self.W_specs or 'high' not in self.W_specs:
+                    self.w.uniform(0.0, 1.0)
+                else:
+                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
+            elif self.W_specs['init'] == 'xavier':
+                pass  # TODO
+
+        xs = [x, self.w]
+
+        if len(param_data) == 2:
+            if not hasattr(self, 'b'):
+                self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                if self.b_specs['init'] == 'gaussian':
+                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
+                        self.b.gaussian(0.0, 0.1)
+                    else:
+                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
+                elif self.b_specs['init'] == 'uniform':
+                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
+                        self.b.uniform(0.0, 1.0)
+                    else:
+                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
+                elif self.b_specs['init'] == 'xavier':
+                    pass  # TODO
+                elif self.b_specs['init'] == 'constant':
+                    self.b.set_value(0.0)
+
+            xs.append(self.b)
+
+        xs = tuple(xs)
+        return self._do_forward(*xs)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        ret = self.PyLayer.layer.Backward(0, dy)
+        return (ret[0],)+ret[1]
+
+
+class MaxPool2d(Operation):
+    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+
+        name = 'MaxPool2d'
+        border_mode = 'same'
+        data_format = 'NCHW'
+        input_sample_shape = None
+
+        allowed_kwargs = {'name': name,
+                          'border_mode': border_mode,
+                          'data_format': data_format,
+                          'input_sample_shape': input_sample_shape
+                          }
+
+        for kwarg in kwargs:
+            if kwarg not in allowed_kwargs:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                allowed_kwargs[kwarg] = kwargs[kwarg]
+
+        if padding == 0:
+            pad = None
+        else:
+            pad = padding
+
+        if dilation != 1 or return_indices is not False or ceil_mode is not False:
+            raise ValueError('Not implemented yet')
+
+        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
+                                           kernel_size, stride, border_mode,
+                                           pad, data_format, input_sample_shape)
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+
+        return self._do_forward(x)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class ReLU_Layer(Operation):
+    def __init__(self, name='ReLU', mode='relu',input_sample_shape=None):
+        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class Flatten(Operation):
+    def __init__(self, name='Flatten', axis=1, input_sample_shape=None):
+        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+        return self._do_forward(x)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
+
+    def backward(self, dy):
+        return self.PyLayer.layer.Backward(0, dy)[0]
+
+
+class Linear(Operation):
+    def __init__(self, in_features, out_features, bias=True, **kwargs):
+
+        name = 'Linear'
+        W_transpose=False
+        W_specs = {'init': 'gaussian',
+                   'mean': 0.0,
+                   'std': 0.1}
+        b_specs = {'init': 'constant'}
+        input_sample_shape = in_features
+
+        inner_params = {'name': name,
+                          'W_transpose': W_transpose,
+                          'W_specs': W_specs,
+                          'b_specs': b_specs,
+                          'input_sample_shape': input_sample_shape
+                          }
+
+        # TODO valid value of inner_params check
+
+        for kwarg in kwargs:
+            if kwarg not in allowed_kwargs:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                inner_params[kwarg] = kwargs[kwarg]
+
+        self.W_specs = W_specs
+        self.b_specs = b_specs
+
+        self.PyLayer = layer.Dense(inner_params['name'], num_output=out_features, use_bias=bias,
+                     W_specs=self.W_specs, b_specs=self.b_specs,
+                     W_transpose=inner_params['W_transpose'], input_sample_shape=inner_params['input_sample_shape'])
+
+    def __call__(self, x, flag=True):
+        assert type(flag) is bool, 'flag can only be bool.'
+        if flag:
+            self.flag = model_pb2.kTrain
+        else:
+            self.flag = model_pb2.kEval
+
+        if not self.PyLayer.has_setup:
+            self.PyLayer.setup(x.shape[1:])
+
+        param_data = self.PyLayer.layer.param_values()
+        if not hasattr(self, 'w'):
+            self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
+            if self.W_specs['init'] == 'gaussian':
+                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
+                    self.w.gaussian(0.0, 0.1)
+                else:
+                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
+            elif self.W_specs['init'] == 'uniform':
+                if 'low' not in self.W_specs or 'high' not in self.W_specs:
+                    self.w.uniform(0.0, 1.0)
+                else:
+                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
+            elif self.W_specs['init'] == 'xavier':
+                pass  # TODO
+
+        xs = [x, self.w]
+
+        if len(param_data) == 2:
+            if not hasattr(self, 'b'):
+                self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
+                if self.b_specs['init'] == 'gaussian':
+                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
+                        self.b.gaussian(0.0, 0.1)
+                    else:
+                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
+                elif self.b_specs['init'] == 'uniform':
+                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
+                        self.b.uniform(0.0, 1.0)
+                    else:
+                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
+                elif self.b_specs['init'] == 'xavier':
+                    pass  # TODO
+                elif self.b_specs['init'] == 'constant':
+                    self.b.set_value(0.0)
+
+            xs.append(self.b)
+
+        xs = tuple(xs)
+        return self._do_forward(*xs)
+
+    def forward(self, *xs):
+        return self.PyLayer.layer.Forward(self.flag, xs[0])
 
+    def backward(self, dy):
+        ret = self.PyLayer.layer.Backward(0, dy)
+        return (ret[0],)+ret[1]
 
 def infer_dependency(op):
     '''

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f70f1201/python/singa/layer_ops.py
----------------------------------------------------------------------
diff --git a/python/singa/layer_ops.py b/python/singa/layer_ops.py
deleted file mode 100644
index dcbacf9..0000000
--- a/python/singa/layer_ops.py
+++ /dev/null
@@ -1,287 +0,0 @@
-from singa import tensor
-from singa import layer
-from singa.proto import model_pb2
-
-
-class Conv2d(tensor.Operation):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs):
-
-        name='Conv2d'
-        border_mode = 'same'
-        cudnn_prefer = 'fastest'
-        workspace_byte_limit = 1024
-        data_format = 'NCHW'
-        W_specs = None
-        b_specs = None
-        input_sample_shape=None
-
-        allowed_kwargs = {'name':name,
-                          'border_mode':border_mode,
-                          'cudnn_prefer':cudnn_prefer,
-                          'workspace_byte_limit':workspace_byte_limit,
-                          'data_format':data_format,
-                          'W_specs':W_specs,
-                          'b_specs':b_specs,
-                          'input_sample_shape':input_sample_shape
-                          }
-
-        for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
-                raise TypeError('Keyword argument not understood:', kwarg)
-            else:
-                allowed_kwargs[kwarg] = kwargs[kwarg]
-
-        self.W_specs=W_specs
-        self.b_specs=b_specs
-
-        if padding == 0:
-            pad = None
-        else:
-            pad = padding
-
-        if dilation != 1 or groups != 1:
-            raise ValueError('Not implemented yet')
-
-        self.PyLayer = layer.Conv2D(name, nb_kernels=out_channels, kernel=kernel_size, stride=stride, border_mode=border_mode,
-                 cudnn_prefer=cudnn_prefer, workspace_byte_limit=workspace_byte_limit,
-                 data_format=data_format, use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
-                 pad=pad, input_sample_shape=input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-
-        param_data = self.PyLayer.layer.param_values()
-        if not hasattr(self, 'w'):
-            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'gaussian':
-                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
-                    self.w.gaussian(0.0, 0.1)
-                else:
-                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
-            elif self.W_specs['init'] == 'uniform':
-                if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 1.0)
-                else:
-                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
-            elif self.W_specs['init'] == 'xavier':
-                pass  # TODO
-
-        xs = [x, self.w]
-
-        if len(param_data) == 2:
-            if not hasattr(self, 'b'):
-                self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-                if self.b_specs['init'] == 'gaussian':
-                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
-                        self.b.gaussian(0.0, 0.1)
-                    else:
-                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
-                elif self.b_specs['init'] == 'uniform':
-                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 1.0)
-                    else:
-                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'xavier':
-                    pass  # TODO
-                else:
-                    self.b.set_value(0.0)
-
-            xs.append(self.b)
-
-        xs = tuple(xs)
-        return self._do_forward(*xs)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        ret = self.PyLayer.layer.Backward(0, dy)
-        return (ret[0],)+ret[1]
-
-
-class MaxPool2d(tensor.Operation):
-    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
-
-        name = 'MaxPool2d'
-        border_mode = 'same'
-        data_format = 'NCHW'
-        input_sample_shape = None
-
-        allowed_kwargs = {'name': name,
-                          'border_mode': border_mode,
-                          'data_format': data_format,
-                          'input_sample_shape': input_sample_shape
-                          }
-
-        for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
-                raise TypeError('Keyword argument not understood:', kwarg)
-            else:
-                allowed_kwargs[kwarg] = kwargs[kwarg]
-
-        if padding == 0:
-            pad = None
-        else:
-            pad = padding
-
-        if dilation != 1 or return_indices is not False or ceil_mode is not False:
-            raise ValueError('Not implemented yet')
-
-        self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX,
-                                           kernel_size, stride, border_mode,
-                                           pad, data_format, input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-
-        return self._do_forward(x)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]
-
-
-class ReLU(tensor.Operation):
-    def __init__(self, name='ReLU', mode='relu',input_sample_shape=None):
-        self.PyLayer = layer.Activation(name, mode, input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, flag=True, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]
-
-
-class Flatten(tensor.Operation):
-    def __init__(self, name, axis=1, input_sample_shape=None):
-        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-        return self._do_forward(x)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        return self.PyLayer.layer.Backward(0, dy)[0]
-
-
-class Linear(tensor.Operation):
-    def __init__(self, in_features, out_features, bias=True, **kwargs):
-
-        name = 'Linear'
-        W_transpose=False
-        W_specs = None
-        b_specs = None
-        input_sample_shape = in_features
-
-        allowed_kwargs = {'name': name,
-                          'W_transpose': W_transpose,
-                          'W_specs': W_specs,
-                          'b_specs': b_specs,
-                          'input_sample_shape': input_sample_shape
-                          }
-
-        for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
-                raise TypeError('Keyword argument not understood:', kwarg)
-            else:
-                allowed_kwargs[kwarg] = kwargs[kwarg]
-
-        self.W_specs = W_specs
-        self.b_specs = b_specs
-
-        self.PyLayer = layer.Dense(name, num_output=out_features, use_bias=bias,
-                     W_specs=self.W_specs, b_specs=self.b_specs,
-                     W_transpose=W_transpose, input_sample_shape=input_sample_shape)
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
-            self.flag = model_pb2.kTrain
-        else:
-            self.flag = model_pb2.kEval
-
-        if not self.PyLayer.has_setup:
-            self.PyLayer.setup(x.shape[1:])
-
-        param_data = self.PyLayer.layer.param_values()
-        if not hasattr(self, 'w'):
-            self.w = tensor.Tensor(data=param_data[0], requires_grad=True, stores_grad=True)
-            if self.W_specs['init'] == 'gaussian':
-                if 'std' not in self.W_specs or 'mean' not in self.W_specs:
-                    self.w.gaussian(0.0, 0.1)
-                else:
-                    self.w.gaussian(self.W_specs['mean'],self.W_specs['std'])
-            elif self.W_specs['init'] == 'uniform':
-                if 'low' not in self.W_specs or 'high' not in self.W_specs:
-                    self.w.uniform(0.0, 1.0)
-                else:
-                    self.w.uniform(self.W_specs['low'],self.W_specs['high'])
-            elif self.W_specs['init'] == 'xavier':
-                pass  # TODO
-
-        xs = [x, self.w]
-
-        if len(param_data) == 2:
-            if not hasattr(self, 'b'):
-                self.b = tensor.Tensor(data=param_data[1], requires_grad=True, stores_grad=True)
-                if self.b_specs['init'] == 'gaussian':
-                    if 'std' not in self.b_specs or 'mean' not in self.b_specs:
-                        self.b.gaussian(0.0, 0.1)
-                    else:
-                        self.b.gaussian(self.b_specs['mean'], self.b_specs['std'])
-                elif self.b_specs['init'] == 'uniform':
-                    if 'low' not in self.b_specs or 'high' not in self.b_specs:
-                        self.b.uniform(0.0, 1.0)
-                    else:
-                        self.b.uniform(self.b_specs['low'], self.b_specs['high'])
-                elif self.b_specs['init'] == 'xavier':
-                    pass  # TODO
-                else:
-                    self.b.set_value(0.0)
-
-            xs.append(self.b)
-
-        xs = tuple(xs)
-        return self._do_forward(*xs)
-
-    def forward(self, *xs):
-        return self.PyLayer.layer.Forward(self.flag, xs[0])
-
-    def backward(self, dy):
-        ret = self.PyLayer.layer.Backward(0, dy)
-        return (ret[0],)+ret[1]

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f70f1201/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index fcf00b8..ff43cc6 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -97,7 +97,7 @@ class Tensor(object):
             copy_from_numpy(self.data, data)
         elif isinstance(data, CTensor):
             self.data = data
-            assert data.device == device, 'not the same device'
+            assert data.device() == device, 'not the same device'
         else:
             self.data = CTensor(list(shape), device, dtype)
 
@@ -1356,225 +1356,3 @@ class Dummy(Operation):
         self.src = []
         self.y_id2idx = {id(tensor): 0}
         self.requires_grad = False
-
-
-class ReLU(Operation):
-
-    def forward(self, x):
-        '''
-        Args:
-            x(CTensor): input tensor
-
-        Returns:
-            a new CTensor whose element y = x if x >= 0; otherwise 0;
-        '''
-        self.input = x
-        return singa.ReLU(x)
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy(CTensor): dL / dy
-
-        Returns:
-            dx(CTensor): dL / dx = dy if x >= 0; otherwise 0;
-        '''
-        dx = singa.GTFloat(self.input, 0.0)
-        return singa.__mul__(dy, dx)
-
-
-def relu(x):
-    return ReLU()(x)[0]
-
-
-class Matmul(Operation):
-    '''For matrix multiplication'''
-
-    def forward(self, x, w):
-        '''Do forward propgation.
-
-        Store the x(or w) if w(or x) requires gradient.
-
-        Args:
-            x (CTensor): matrix
-            w (CTensor): matrix
-
-        Returns:
-            a CTensor for the result
-        '''
-        self.input = (x, w)
-        return singa.Mult(x, w)
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy (CTensor): data for the dL / dy, L is the loss
-
-        Returns:
-            a tuple for (dx, dw)
-        '''
-        return singa.Mult(dy, self.input[1].T()), \
-            singa.Mult(self.input[0].T(), dy)
-
-
-def matmul(x, w):
-    return Matmul()(x, w)[0]
-
-
-class AddBias(Operation):
-    '''
-    Add Bias to each row / column of the Tensor, depending on the parameter axis.
-    '''
-
-    def __init__(self, axis=0):
-        '''
-        To indicate the calculation axis, 0 for row, 1 for column.
-
-        Args:
-            axis: 0 or 1, default is 0.
-        '''
-        self.axis = axis
-
-    def forward(self, x, b):
-        '''
-        Args:
-            x: matrix.
-            b: bias to be added.
-
-        Return:
-            the result Tensor
-        '''
-        if self.axis == 0:
-            singa.AddRow(b, x)
-        elif self.axis == 1:
-            singa.AddColumn(b, x)
-        return x
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy (CTensor): data for the dL / dy, L is the loss.
-
-        Return:
-            a tuple for (db, dx), db is data for dL / db, dx is data
-            for dL / dx.
-        '''
-        if self.axis == 0:
-            return dy, singa.Sum(dy, 0)
-        elif self.axis == 1:
-            return dy, singa.Sum(dy, 0)
-
-
-def add_bias(x, b, axis=0):
-    return AddBias(axis)(x, b)[0]
-
-
-class SoftMax(Operation):
-    '''
-    Apply SoftMax for each row of the Tensor or each column of the Tensor
-    according to the parameter axis.
-    '''
-
-    def __init__(self, axis=0):
-        self.axis = axis
-
-    def forward(self, x):
-        '''
-        Args:
-            x(data): the input 1d or 2d tensor
-
-        Returns:
-            the result Tensor
-        '''
-        if self.axis == 1:
-            x = x.T()
-        self.output = singa.SoftMax(x)
-        if self.axis == 0:
-            return self.output
-        elif self.axis == 1:
-            return self.output.T()
-
-    def backward(self, dy):
-        '''
-        Args:
-            dy (CTensor): data for the dL / dy, L is the loss
-
-        Returns:
-            dx (Ctensor): data for the dL / dx, L is the loss,
-            x is the input of current Opertion
-        '''
-        # calculations are made on numpy array
-        if self.axis == 1:
-            dy = dy.T()
-        grad = ctensor2numpy(dy)
-        output = ctensor2numpy(self.output)
-        out_1 = np.einsum('ki,ki->ki', grad, output)
-        medium_out = np.einsum('ki,kj->kij', output, output)
-        out_2 = np.einsum('kij,kj->ki', medium_out, grad)
-        out = out_1 - out_2
-        dx = CTensor(out_1.shape)
-        dx.CopyFloatDataFromHostPtr(out.flatten())
-        if self.axis == 0:
-            return dx
-        elif self.axis == 1:
-            return dx.T()
-
-
-def soft_max(x, axis=0):
-    return SoftMax(axis)(x)[0]
-
-
-class CrossEntropy(Operation):
-    '''
-    Calculte CrossEntropy loss for a batch of training data.
-
-    '''
-
-    def forward(self, x, t):
-        '''
-        Args:
-            x (CTensor): 1d or 2d tensor, the prediction data(output) of current network.
-            t (CTensor): 1d or 2d tensor, the target data for training.
-
-        Returns:
-            loss (CTensor): scalar.
-        '''
-        loss = CTensor((1,))
-        loss_data = -singa.SumAsFloat(singa.__mul__(t, singa.Log(x)))
-        loss.SetFloatValue(loss_data / x.shape()[0])
-        self.x = x
-        self.t = t
-        self.input = (x, t)
-        return loss
-
-    def backward(self, dy=1.0):
-        '''
-        Args:
-            dy (float or CTensor): scalar, accumulate gradient from outside of current network, usually
-            equal to 1.0
-
-        Returns:
-            dx (CTensor): data for the dL /dx, L is the loss, x is the output of current network.
-            note that this is true for dy = 1.0
-        '''
-        dx = singa.__div__(self.t, self.x)
-        dx *= float(-1 / self.x.shape()[0])
-        if isinstance(dy, float):
-            # dtype of dy: float
-            dx *= dy
-            return dx, None
-        elif isinstance(dy, CTensor):
-            pass  # TODO, broadcast elementwise multiply seems not support
-
-
-def cross_entropy(y, t):
-    return CrossEntropy()(y, t)[0]
-
-
-def ctensor2numpy(x):
-    '''
-    To be used in SoftMax Operation.
-    Convert a singa_tensor to numpy_tensor.
-    '''
-    np_array = x.GetFloatValue(int(x.Size()))
-    return np_array.reshape(x.shape())