You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by mo...@apache.org on 2018/05/18 04:52:18 UTC
[12/14] incubator-singa git commit: SINGA-349 Create layer operations for autograd

SINGA-349 Create layer operations for autograd

clean the code and add comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d7d629b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d7d629b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d7d629b

Branch: refs/heads/master
Commit: 6d7d629bf00437c23a67f1de3b1ed085764e9492
Parents: 403843d
Author: Wang Wei <dc...@nus.edu.sg>
Authored: Thu May 17 21:18:31 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist_cnn.py |  41 ++++----
 python/singa/autograd.py       | 200 +++++++++++++++++++-----------------
 2 files changed, 126 insertions(+), 115 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d7d629b/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 7afbb9e..3ab8b62 100644
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -31,18 +31,18 @@ def to_categorical(y, num_classes):
     n = y.shape[0]
     categorical = np.zeros((n, num_classes))
     categorical[np.arange(n), y] = 1
-    categorical=categorical.astype(np.float32)
+    categorical = categorical.astype(np.float32)
     return categorical
 
 
 def preprocess(data):
-    data=data.astype(np.float32)
+    data = data.astype(np.float32)
     data /= 255
-    data=np.expand_dims(data, axis=1)
+    data = np.expand_dims(data, axis=1)
     return data
 
 
-def accuracy(pred,target):
+def accuracy(pred, target):
     y = np.argmax(pred, axis=1)
     t = np.argmax(target, axis=1)
     a = y == t
@@ -55,8 +55,9 @@ if __name__ == '__main__':
     parser.add_argument('file_path', type=str, help='the dataset path')
     args = parser.parse_args()
 
-    assert os.path.exists(args.file_path), 'Pls download the MNIST dataset from' \
-     'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
+    assert os.path.exists(args.file_path), \
+        'Pls download the MNIST dataset from ' \
+        'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
 
     train, test = load_data(args.file_path)
 
@@ -69,18 +70,17 @@ if __name__ == '__main__':
     x_train = preprocess(train[0])
     y_train = to_categorical(train[1], num_classes)
 
-    x_test=preprocess(test[0])
-    y_test=to_categorical(test[1],num_classes)
-    print ('the shape of training data is', x_train.shape)
-    print ('the shape of training label is', y_train.shape)
-    print ('the shape of testing data is', x_test.shape)
-    print ('the shape of testing label is', y_test.shape)
+    x_test = preprocess(test[0])
+    y_test = to_categorical(test[1], num_classes)
+    print('the shape of training data is', x_train.shape)
+    print('the shape of training label is', y_train.shape)
+    print('the shape of testing data is', x_test.shape)
+    print('the shape of testing label is', y_test.shape)
 
     # operations initialization
     conv1 = autograd.Conv2d(3, 32)
     conv2 = autograd.Conv2d(32, 32)
-    linear = autograd.Linear(32*28*28, 10)
-
+    linear = autograd.Linear(32 * 28 * 28, 10)
 
     def forward(x, t):
         y = conv1(x)
@@ -94,18 +94,21 @@ if __name__ == '__main__':
         loss = autograd.cross_entropy(y, t)
         return loss, y
 
+    autograd.training = True
     for epoch in range(epochs):
         for i in range(batch_number):
-            inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
-            targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+            inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :])
+            targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :])
 
             loss, y = forward(inputs, targets)
 
-            accuracy_rate = accuracy(autograd.ctensor2numpy(y.data),autograd.ctensor2numpy(targets.data))
+            accuracy_rate = accuracy(autograd.ctensor2numpy(
+                y.data), autograd.ctensor2numpy(targets.data))
             if (i % 5 == 0):
-                print('accuracy is:', accuracy_rate,'loss is:', autograd.ctensor2numpy(loss.data)[0])
+                print('accuracy is:', accuracy_rate, 'loss is:',
+                      autograd.ctensor2numpy(loss.data)[0])
 
             in_grads = autograd.backward(loss)
 
             for param in in_grads:
-                sgd.apply(0, in_grads[param], param, '')
\ No newline at end of file
+                sgd.apply(0, in_grads[param], param, '')

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d7d629b/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index de3dc92..fc438ab 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -1,18 +1,18 @@
 
 from __future__ import division
 
-from functools import reduce
 from collections import Counter, deque
-from .tensor import Tensor
+import numpy as np
+import math
 
-from singa import layer
+from .tensor import Tensor
+from . import layer
 from singa.proto import model_pb2
 from . import singa_wrap as singa
 
-import numpy as np
-import math
 
 CTensor = singa.Tensor
+training = False
 
 
 class Operation(object):
@@ -20,12 +20,16 @@ class Operation(object):
     An operation includes the forward and backward function of
     tensor calculation.
 
-    To add a specific operation Xxxx, subclass Operation and implement
-    forward() and backward(). Then implement a function xxxx which creates
-    a Xxxx instance and calls __call__ to do forward. The autograd engine
-    is able to do backward propagation by calling the backward() of Xxxx
-    automatically. Notice that the tensors are CTensor. NOT Python Tensor.
-    The arguments of forward() and backward() should only include CTensor args;
+    Steps to add a specific operation Xxxx:
+    1. create a subclass of Operation, name it as Xxxx
+    2. if Xxxx is implemented using other Operations, then override
+       _do_forward() function;
+       if Xxxx is implemented using CTensor operations,
+       then override the forward() and backward(); The arguments of forward()
+       and backward() should only include CTensor;
+       if Xxxx is implemented by calling functions in layer.py, then override
+       __call__(), forward() and backward(). TODO(wangwei) avoid this complex
+       case.
     '''
 
     def __call__(self, *xs):
@@ -103,6 +107,9 @@ class Operation(object):
         '''
         raise NotImplementedError
 
+    def get_params(self):
+        return []
+
 
 class Dummy(Operation):
     '''Dummy operation whice serves as a placehoder for autograd
@@ -119,10 +126,6 @@ class Dummy(Operation):
 
 
 class ReLU(Operation):
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        self.flag=flag
-        return self._do_forward(x)
 
     def forward(self, x):
         '''
@@ -132,7 +135,7 @@ class ReLU(Operation):
         Returns:
             a new CTensor whose element y = x if x >= 0; otherwise 0;
         '''
-        if self.flag:
+        if training:
             self.input = x
         return singa.ReLU(x)
 
@@ -154,10 +157,6 @@ def relu(x):
 
 class Matmul(Operation):
     '''For matrix multiplication'''
-    def __call__(self, x, w, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        self.flag=flag
-        return self._do_forward(x, w)
 
     def forward(self, x, w):
         '''Do forward propgation.
@@ -171,7 +170,7 @@ class Matmul(Operation):
         Returns:
             a CTensor for the result
         '''
-        if self.flag:
+        if training:
             self.input = (x, w)
         return singa.Mult(x, w)
 
@@ -187,13 +186,13 @@ class Matmul(Operation):
             singa.Mult(self.input[0].T(), dy)
 
 
-def matmul(x, w, flag=True):
-    return Matmul()(x, w, flag)[0]
+def matmul(x, w):
+    return Matmul()(x, w)[0]
 
 
 class AddBias(Operation):
     '''
-    Add Bias to each row / column of the Tensor, depending on the parameter axis.
+    Add Bias to each row / column of the Tensor, depending on the axis arg.
     '''
 
     def __init__(self, axis=0):
@@ -303,7 +302,8 @@ class CrossEntropy(Operation):
     def forward(self, x, t):
         '''
         Args:
-            x (CTensor): 1d or 2d tensor, the prediction data(output) of current network.
+            x (CTensor): 1d or 2d tensor, the prediction data(output)
+                         of current network.
             t (CTensor): 1d or 2d tensor, the target data for training.
 
         Returns:
@@ -320,12 +320,13 @@ class CrossEntropy(Operation):
     def backward(self, dy=1.0):
         '''
         Args:
-            dy (float or CTensor): scalar, accumulate gradient from outside of current network, usually
-            equal to 1.0
+            dy (float or CTensor): scalar, accumulate gradient from outside
+                                of current network, usually equal to 1.0
 
         Returns:
-            dx (CTensor): data for the dL /dx, L is the loss, x is the output of current network.
-            note that this is true for dy = 1.0
+            dx (CTensor): data for the dL /dx, L is the loss, x is the output
+                          of current network. note that this is true for
+                          dy = 1.0
         '''
         dx = singa.__div__(self.t, self.x)
         dx *= float(-1 / self.x.shape()[0])
@@ -351,17 +352,18 @@ def ctensor2numpy(x):
 
 
 class Conv2d(Operation):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True,
-                 **kwargs):
+
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True, **kwargs):
 
         inner_params = {'name': 'Conv2d',
-                          'border_mode': 'same',
-                          'cudnn_prefer': 'fastest',
-                          'workspace_byte_limit': 1024,
-                          'data_format': 'NCHW',
-                          'W_specs': {'init': 'xavier'},
-                          'b_specs': {'init': 'constant'},
-                          'input_sample_shape': None}
+                        'border_mode': 'same',
+                        'cudnn_prefer': 'fastest',
+                        'workspace_byte_limit': 1024,
+                        'data_format': 'NCHW',
+                        'W_specs': {'init': 'xavier'},
+                        'b_specs': {'init': 'constant'},
+                        'input_sample_shape': None}
         # TODO valid value of inner_params check
 
         for kwarg in kwargs:
@@ -369,7 +371,7 @@ class Conv2d(Operation):
                 raise TypeError('Keyword argument not understood:', kwarg)
             else:
                 inner_params[kwarg] = kwargs[kwarg]
-                
+
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.W_specs = inner_params['W_specs']
@@ -388,21 +390,30 @@ class Conv2d(Operation):
         if dilation != 1 or groups != 1:
             raise ValueError('Not implemented yet')
 
-        self.PyLayer = layer.Conv2D(inner_params['name'], nb_kernels=out_channels, kernel=kernel_size, stride=stride,
+        self.PyLayer = layer.Conv2D(inner_params['name'],
+                                    nb_kernels=out_channels,
+                                    kernel=kernel_size,
+                                    stride=stride,
                                     border_mode=inner_params['border_mode'],
-                 cudnn_prefer=inner_params['cudnn_prefer'], workspace_byte_limit=inner_params['workspace_byte_limit'],
-                 data_format=inner_params['data_format'], use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
-                 pad=pad, input_sample_shape=inner_params['input_sample_shape'])
+                                    cudnn_prefer=inner_params['cudnn_prefer'],
+                                    workspace_byte_limit=inner_params[
+                                        'workspace_byte_limit'],
+                                    data_format=inner_params['data_format'],
+                                    use_bias=bias,
+                                    W_specs=self.W_specs,
+                                    b_specs=self.b_specs,
+                                    pad=pad,
+                                    input_sample_shape=inner_params['input_sample_shape'])
 
     def get_params(self):
-        assert self.has_setup, \
-            'Must call setup() before get_params()'
-        params = self.PyLayer.layer.param_values()
-        return params
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
+        assert self.init_value is True, 'must initialize before get_params()'
+        if self.bias:
+            return (self.w, self.b)
+        else:
+            return self.w
+
+    def __call__(self, x):
+        if training:
             self.flag = model_pb2.kTrain
         else:
             self.flag = model_pb2.kEval
@@ -413,15 +424,18 @@ class Conv2d(Operation):
         param_data = self.PyLayer.layer.param_values()
 
         if not hasattr(self, 'w'):
-            self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True)
-            std = math.sqrt(2.0/(self.in_channels*self.kernel_size[0]*self.kernel_size[1]+self.out_channels))
+            self.w = Tensor(device=param_data[0].device, data=param_data[
+                            0], requires_grad=True, stores_grad=True)
+            std = math.sqrt(
+                2.0 / (self.in_channels * self.kernel_size[0] * self.kernel_size[1] + self.out_channels))
             self.w.gaussian(0.0, std)
 
         xs = [x, self.w]
 
         if len(param_data) == 2:
             if not hasattr(self, 'b'):
-                self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True)
+                self.b = Tensor(device=param_data[1].device, data=param_data[
+                                1], requires_grad=True, stores_grad=True)
                 self.b.set_value(0.0)
 
             xs.append(self.b)
@@ -434,10 +448,11 @@ class Conv2d(Operation):
 
     def backward(self, dy):
         ret = self.PyLayer.layer.Backward(self.flag, dy)
-        return (ret[0],)+ret[1]
+        return (ret[0],) + ret[1]
 
 
 class Linear(Operation):
+
     def __init__(self, in_features, out_features, bias=True):
         self.in_features = in_features
         self.out_features = out_features
@@ -453,47 +468,36 @@ class Linear(Operation):
         else:
             return self.w
 
-    def init_params(self, w, b=None):
-        if self.bias:
-            assert b is not None, 'must initialize bias.'
-            assert w.shape == self.w_shape, 'shape of parameters must match.'
-            assert b.shape == self.b_shape, 'shape of parameters must match.'
-            self.w = w
-            self.b = b
-        else:
-            assert b is None, 'cannot initialize bias.'
-            assert w.shape == self.w_shape, 'shape of parameters must match.'
-            self.w = w
-        self.init_value = True
-        return
-
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
+    def __call__(self, x):
         if self.init_value is False:
-            self.w = Tensor(shape=self.w_shape, requires_grad=True, stores_grad=True)
+            self.w = Tensor(shape=self.w_shape,
+                            requires_grad=True, stores_grad=True)
             std = math.sqrt(2.0 / (self.in_features + self.out_features))
             self.w.gaussian(0.0, std)
             if self.bias:
-                self.b = Tensor(shape=self.b_shape, requires_grad=True, stores_grad=True)
+                self.b = Tensor(shape=self.b_shape,
+                                requires_grad=True, stores_grad=True)
                 self.b.set_value(0.0)
             self.init_value = True
-        y = matmul(x, self.w, flag)
+        y = matmul(x, self.w)
         if self.bias:
             y = add_bias(y, self.b, axis=0)
         return y
 
 
 class MaxPool2d(Operation):
-    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+
+    def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1,
+                 return_indices=False, ceil_mode=False, **kwargs):
 
         inner_params = {'name': 'MaxPool2d',
-                          'border_mode': 'same',
-                          'data_format': 'NCHW',
-                          'input_sample_shape': None
-                          }
+                        'border_mode': 'same',
+                        'data_format': 'NCHW',
+                        'input_sample_shape': None
+                        }
 
         for kwarg in kwargs:
-            if kwarg not in allowed_kwargs:
+            if kwarg not in inner_params:
                 raise TypeError('Keyword argument not understood:', kwarg)
             else:
                 inner_params[kwarg] = kwargs[kwarg]
@@ -503,16 +507,18 @@ class MaxPool2d(Operation):
         else:
             pad = padding
 
-        if dilation != 1 or return_indices is not False or ceil_mode is not False:
+        if dilation != 1 or return_indices or ceil_mode:
             raise ValueError('Not implemented yet')
 
-        self.PyLayer = layer.Pooling2D(inner_params['name'], model_pb2.PoolingConf.MAX,
-                                           kernel_size, stride, inner_params['border_mode'],
-                                           pad, inner_params['data_format'], inner_params['input_sample_shape'])
+        self.PyLayer = layer.Pooling2D(inner_params['name'],
+                                       model_pb2.PoolingConf.MAX,
+                                       kernel_size, stride, inner_params[
+                                           'border_mode'],
+                                       pad, inner_params['data_format'],
+                                       inner_params['input_sample_shape'])
 
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
+    def __call__(self, x):
+        if training:
             self.flag = model_pb2.kTrain
         else:
             self.flag = model_pb2.kEval
@@ -529,17 +535,19 @@ class MaxPool2d(Operation):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
 
-def max_pool_2d(x, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
-    return MaxPool2d(kernel_size, stride, padding, dilation, return_indices, ceil_mode, **kwargs)(x)[0]
+def max_pool_2d(x, kernel_size=3, stride=1, padding=0, dilation=1,
+                return_indices=False, ceil_mode=False, **kwargs):
+    return MaxPool2d(kernel_size, stride, padding, dilation, return_indices,
+                     ceil_mode, **kwargs)(x)[0]
 
 
 class Flatten(Operation):
-    def __init__(self, name='Flatten', axis=1, input_sample_shape=None):
-        self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
 
-    def __call__(self, x, flag=True):
-        assert type(flag) is bool, 'flag can only be bool.'
-        if flag:
+    def __init__(self):
+        self.PyLayer = layer.Flatten('flatten', 1)
+
+    def __call__(self, x):
+        if training:
             self.flag = model_pb2.kTrain
         else:
             self.flag = model_pb2.kEval
@@ -554,8 +562,8 @@ class Flatten(Operation):
         return self.PyLayer.layer.Backward(0, dy)[0]
 
 
-def flatten(x, name='Flatten', axis=1, input_sample_shape=None):
-    return Flatten(name, axis, input_sample_shape)(x)[0]
+def flatten(x):
+    return Flatten()(x)[0]
 
 
 def infer_dependency(op):