You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by mo...@apache.org on 2018/05/18 04:52:18 UTC
[12/14] incubator-singa git commit: SINGA-349 Create layer operations
for autograd
SINGA-349 Create layer operations for autograd
clean the code and add comments
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d7d629b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d7d629b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d7d629b
Branch: refs/heads/master
Commit: 6d7d629bf00437c23a67f1de3b1ed085764e9492
Parents: 403843d
Author: Wang Wei <dc...@nus.edu.sg>
Authored: Thu May 17 21:18:31 2018 +0800
Committer: Wang Wei <dc...@nus.edu.sg>
Committed: Thu May 17 21:19:07 2018 +0800
----------------------------------------------------------------------
examples/autograd/mnist_cnn.py | 41 ++++----
python/singa/autograd.py | 200 +++++++++++++++++++-----------------
2 files changed, 126 insertions(+), 115 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d7d629b/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 7afbb9e..3ab8b62 100644
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -31,18 +31,18 @@ def to_categorical(y, num_classes):
n = y.shape[0]
categorical = np.zeros((n, num_classes))
categorical[np.arange(n), y] = 1
- categorical=categorical.astype(np.float32)
+ categorical = categorical.astype(np.float32)
return categorical
def preprocess(data):
- data=data.astype(np.float32)
+ data = data.astype(np.float32)
data /= 255
- data=np.expand_dims(data, axis=1)
+ data = np.expand_dims(data, axis=1)
return data
-def accuracy(pred,target):
+def accuracy(pred, target):
y = np.argmax(pred, axis=1)
t = np.argmax(target, axis=1)
a = y == t
@@ -55,8 +55,9 @@ if __name__ == '__main__':
parser.add_argument('file_path', type=str, help='the dataset path')
args = parser.parse_args()
- assert os.path.exists(args.file_path), 'Pls download the MNIST dataset from' \
- 'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
+ assert os.path.exists(args.file_path), \
+ 'Pls download the MNIST dataset from ' \
+ 'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
train, test = load_data(args.file_path)
@@ -69,18 +70,17 @@ if __name__ == '__main__':
x_train = preprocess(train[0])
y_train = to_categorical(train[1], num_classes)
- x_test=preprocess(test[0])
- y_test=to_categorical(test[1],num_classes)
- print ('the shape of training data is', x_train.shape)
- print ('the shape of training label is', y_train.shape)
- print ('the shape of testing data is', x_test.shape)
- print ('the shape of testing label is', y_test.shape)
+ x_test = preprocess(test[0])
+ y_test = to_categorical(test[1], num_classes)
+ print('the shape of training data is', x_train.shape)
+ print('the shape of training label is', y_train.shape)
+ print('the shape of testing data is', x_test.shape)
+ print('the shape of testing label is', y_test.shape)
# operations initialization
conv1 = autograd.Conv2d(3, 32)
conv2 = autograd.Conv2d(32, 32)
- linear = autograd.Linear(32*28*28, 10)
-
+ linear = autograd.Linear(32 * 28 * 28, 10)
def forward(x, t):
y = conv1(x)
@@ -94,18 +94,21 @@ if __name__ == '__main__':
loss = autograd.cross_entropy(y, t)
return loss, y
+ autograd.training = True
for epoch in range(epochs):
for i in range(batch_number):
- inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
- targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False)
+ inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :])
+ targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :])
loss, y = forward(inputs, targets)
- accuracy_rate = accuracy(autograd.ctensor2numpy(y.data),autograd.ctensor2numpy(targets.data))
+ accuracy_rate = accuracy(autograd.ctensor2numpy(
+ y.data), autograd.ctensor2numpy(targets.data))
if (i % 5 == 0):
- print('accuracy is:', accuracy_rate,'loss is:', autograd.ctensor2numpy(loss.data)[0])
+ print('accuracy is:', accuracy_rate, 'loss is:',
+ autograd.ctensor2numpy(loss.data)[0])
in_grads = autograd.backward(loss)
for param in in_grads:
- sgd.apply(0, in_grads[param], param, '')
\ No newline at end of file
+ sgd.apply(0, in_grads[param], param, '')
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d7d629b/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index de3dc92..fc438ab 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -1,18 +1,18 @@
from __future__ import division
-from functools import reduce
from collections import Counter, deque
-from .tensor import Tensor
+import numpy as np
+import math
-from singa import layer
+from .tensor import Tensor
+from . import layer
from singa.proto import model_pb2
from . import singa_wrap as singa
-import numpy as np
-import math
CTensor = singa.Tensor
+training = False
class Operation(object):
@@ -20,12 +20,16 @@ class Operation(object):
An operation includes the forward and backward function of
tensor calculation.
- To add a specific operation Xxxx, subclass Operation and implement
- forward() and backward(). Then implement a function xxxx which creates
- a Xxxx instance and calls __call__ to do forward. The autograd engine
- is able to do backward propagation by calling the backward() of Xxxx
- automatically. Notice that the tensors are CTensor. NOT Python Tensor.
- The arguments of forward() and backward() should only include CTensor args;
+ Steps to add a specific operation Xxxx:
+ 1. create a subclass of Operation, name it as Xxxx
+ 2. if Xxxx is implemented using other Operations, then override
+ _do_forward() function;
+ if Xxxx is implemented using CTensor operations,
+ then override the forward() and backward(); The arguments of forward()
+ and backward() should only include CTensor;
+ if Xxxx is implemented by calling functions in layer.py, then override
+ __call__(), forward() and backward(). TODO(wangwei) avoid this complex
+ case.
'''
def __call__(self, *xs):
@@ -103,6 +107,9 @@ class Operation(object):
'''
raise NotImplementedError
+ def get_params(self):
+ return []
+
class Dummy(Operation):
'''Dummy operation whice serves as a placehoder for autograd
@@ -119,10 +126,6 @@ class Dummy(Operation):
class ReLU(Operation):
- def __call__(self, x, flag=True):
- assert type(flag) is bool, 'flag can only be bool.'
- self.flag=flag
- return self._do_forward(x)
def forward(self, x):
'''
@@ -132,7 +135,7 @@ class ReLU(Operation):
Returns:
a new CTensor whose element y = x if x >= 0; otherwise 0;
'''
- if self.flag:
+ if training:
self.input = x
return singa.ReLU(x)
@@ -154,10 +157,6 @@ def relu(x):
class Matmul(Operation):
'''For matrix multiplication'''
- def __call__(self, x, w, flag=True):
- assert type(flag) is bool, 'flag can only be bool.'
- self.flag=flag
- return self._do_forward(x, w)
def forward(self, x, w):
'''Do forward propgation.
@@ -171,7 +170,7 @@ class Matmul(Operation):
Returns:
a CTensor for the result
'''
- if self.flag:
+ if training:
self.input = (x, w)
return singa.Mult(x, w)
@@ -187,13 +186,13 @@ class Matmul(Operation):
singa.Mult(self.input[0].T(), dy)
-def matmul(x, w, flag=True):
- return Matmul()(x, w, flag)[0]
+def matmul(x, w):
+ return Matmul()(x, w)[0]
class AddBias(Operation):
'''
- Add Bias to each row / column of the Tensor, depending on the parameter axis.
+ Add Bias to each row / column of the Tensor, depending on the axis arg.
'''
def __init__(self, axis=0):
@@ -303,7 +302,8 @@ class CrossEntropy(Operation):
def forward(self, x, t):
'''
Args:
- x (CTensor): 1d or 2d tensor, the prediction data(output) of current network.
+ x (CTensor): 1d or 2d tensor, the prediction data(output)
+ of current network.
t (CTensor): 1d or 2d tensor, the target data for training.
Returns:
@@ -320,12 +320,13 @@ class CrossEntropy(Operation):
def backward(self, dy=1.0):
'''
Args:
- dy (float or CTensor): scalar, accumulate gradient from outside of current network, usually
- equal to 1.0
+ dy (float or CTensor): scalar, accumulate gradient from outside
+ of current network, usually equal to 1.0
Returns:
- dx (CTensor): data for the dL /dx, L is the loss, x is the output of current network.
- note that this is true for dy = 1.0
+ dx (CTensor): data for the dL /dx, L is the loss, x is the output
+ of current network. note that this is true for
+ dy = 1.0
'''
dx = singa.__div__(self.t, self.x)
dx *= float(-1 / self.x.shape()[0])
@@ -351,17 +352,18 @@ def ctensor2numpy(x):
class Conv2d(Operation):
- def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True,
- **kwargs):
+
+ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1,
+ padding=0, dilation=1, groups=1, bias=True, **kwargs):
inner_params = {'name': 'Conv2d',
- 'border_mode': 'same',
- 'cudnn_prefer': 'fastest',
- 'workspace_byte_limit': 1024,
- 'data_format': 'NCHW',
- 'W_specs': {'init': 'xavier'},
- 'b_specs': {'init': 'constant'},
- 'input_sample_shape': None}
+ 'border_mode': 'same',
+ 'cudnn_prefer': 'fastest',
+ 'workspace_byte_limit': 1024,
+ 'data_format': 'NCHW',
+ 'W_specs': {'init': 'xavier'},
+ 'b_specs': {'init': 'constant'},
+ 'input_sample_shape': None}
# TODO valid value of inner_params check
for kwarg in kwargs:
@@ -369,7 +371,7 @@ class Conv2d(Operation):
raise TypeError('Keyword argument not understood:', kwarg)
else:
inner_params[kwarg] = kwargs[kwarg]
-
+
self.in_channels = in_channels
self.out_channels = out_channels
self.W_specs = inner_params['W_specs']
@@ -388,21 +390,30 @@ class Conv2d(Operation):
if dilation != 1 or groups != 1:
raise ValueError('Not implemented yet')
- self.PyLayer = layer.Conv2D(inner_params['name'], nb_kernels=out_channels, kernel=kernel_size, stride=stride,
+ self.PyLayer = layer.Conv2D(inner_params['name'],
+ nb_kernels=out_channels,
+ kernel=kernel_size,
+ stride=stride,
border_mode=inner_params['border_mode'],
- cudnn_prefer=inner_params['cudnn_prefer'], workspace_byte_limit=inner_params['workspace_byte_limit'],
- data_format=inner_params['data_format'], use_bias=bias, W_specs=self.W_specs, b_specs=self.b_specs,
- pad=pad, input_sample_shape=inner_params['input_sample_shape'])
+ cudnn_prefer=inner_params['cudnn_prefer'],
+ workspace_byte_limit=inner_params[
+ 'workspace_byte_limit'],
+ data_format=inner_params['data_format'],
+ use_bias=bias,
+ W_specs=self.W_specs,
+ b_specs=self.b_specs,
+ pad=pad,
+ input_sample_shape=inner_params['input_sample_shape'])
def get_params(self):
- assert self.has_setup, \
- 'Must call setup() before get_params()'
- params = self.PyLayer.layer.param_values()
- return params
-
- def __call__(self, x, flag=True):
- assert type(flag) is bool, 'flag can only be bool.'
- if flag:
+ assert self.init_value is True, 'must initialize before get_params()'
+ if self.bias:
+ return (self.w, self.b)
+ else:
+ return self.w
+
+ def __call__(self, x):
+ if training:
self.flag = model_pb2.kTrain
else:
self.flag = model_pb2.kEval
@@ -413,15 +424,18 @@ class Conv2d(Operation):
param_data = self.PyLayer.layer.param_values()
if not hasattr(self, 'w'):
- self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True)
- std = math.sqrt(2.0/(self.in_channels*self.kernel_size[0]*self.kernel_size[1]+self.out_channels))
+ self.w = Tensor(device=param_data[0].device, data=param_data[
+ 0], requires_grad=True, stores_grad=True)
+ std = math.sqrt(
+ 2.0 / (self.in_channels * self.kernel_size[0] * self.kernel_size[1] + self.out_channels))
self.w.gaussian(0.0, std)
xs = [x, self.w]
if len(param_data) == 2:
if not hasattr(self, 'b'):
- self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True)
+ self.b = Tensor(device=param_data[1].device, data=param_data[
+ 1], requires_grad=True, stores_grad=True)
self.b.set_value(0.0)
xs.append(self.b)
@@ -434,10 +448,11 @@ class Conv2d(Operation):
def backward(self, dy):
ret = self.PyLayer.layer.Backward(self.flag, dy)
- return (ret[0],)+ret[1]
+ return (ret[0],) + ret[1]
class Linear(Operation):
+
def __init__(self, in_features, out_features, bias=True):
self.in_features = in_features
self.out_features = out_features
@@ -453,47 +468,36 @@ class Linear(Operation):
else:
return self.w
- def init_params(self, w, b=None):
- if self.bias:
- assert b is not None, 'must initialize bias.'
- assert w.shape == self.w_shape, 'shape of parameters must match.'
- assert b.shape == self.b_shape, 'shape of parameters must match.'
- self.w = w
- self.b = b
- else:
- assert b is None, 'cannot initialize bias.'
- assert w.shape == self.w_shape, 'shape of parameters must match.'
- self.w = w
- self.init_value = True
- return
-
- def __call__(self, x, flag=True):
- assert type(flag) is bool, 'flag can only be bool.'
+ def __call__(self, x):
if self.init_value is False:
- self.w = Tensor(shape=self.w_shape, requires_grad=True, stores_grad=True)
+ self.w = Tensor(shape=self.w_shape,
+ requires_grad=True, stores_grad=True)
std = math.sqrt(2.0 / (self.in_features + self.out_features))
self.w.gaussian(0.0, std)
if self.bias:
- self.b = Tensor(shape=self.b_shape, requires_grad=True, stores_grad=True)
+ self.b = Tensor(shape=self.b_shape,
+ requires_grad=True, stores_grad=True)
self.b.set_value(0.0)
self.init_value = True
- y = matmul(x, self.w, flag)
+ y = matmul(x, self.w)
if self.bias:
y = add_bias(y, self.b, axis=0)
return y
class MaxPool2d(Operation):
- def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
+
+ def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1,
+ return_indices=False, ceil_mode=False, **kwargs):
inner_params = {'name': 'MaxPool2d',
- 'border_mode': 'same',
- 'data_format': 'NCHW',
- 'input_sample_shape': None
- }
+ 'border_mode': 'same',
+ 'data_format': 'NCHW',
+ 'input_sample_shape': None
+ }
for kwarg in kwargs:
- if kwarg not in allowed_kwargs:
+ if kwarg not in inner_params:
raise TypeError('Keyword argument not understood:', kwarg)
else:
inner_params[kwarg] = kwargs[kwarg]
@@ -503,16 +507,18 @@ class MaxPool2d(Operation):
else:
pad = padding
- if dilation != 1 or return_indices is not False or ceil_mode is not False:
+ if dilation != 1 or return_indices or ceil_mode:
raise ValueError('Not implemented yet')
- self.PyLayer = layer.Pooling2D(inner_params['name'], model_pb2.PoolingConf.MAX,
- kernel_size, stride, inner_params['border_mode'],
- pad, inner_params['data_format'], inner_params['input_sample_shape'])
+ self.PyLayer = layer.Pooling2D(inner_params['name'],
+ model_pb2.PoolingConf.MAX,
+ kernel_size, stride, inner_params[
+ 'border_mode'],
+ pad, inner_params['data_format'],
+ inner_params['input_sample_shape'])
- def __call__(self, x, flag=True):
- assert type(flag) is bool, 'flag can only be bool.'
- if flag:
+ def __call__(self, x):
+ if training:
self.flag = model_pb2.kTrain
else:
self.flag = model_pb2.kEval
@@ -529,17 +535,19 @@ class MaxPool2d(Operation):
return self.PyLayer.layer.Backward(0, dy)[0]
-def max_pool_2d(x, kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs):
- return MaxPool2d(kernel_size, stride, padding, dilation, return_indices, ceil_mode, **kwargs)(x)[0]
+def max_pool_2d(x, kernel_size=3, stride=1, padding=0, dilation=1,
+ return_indices=False, ceil_mode=False, **kwargs):
+ return MaxPool2d(kernel_size, stride, padding, dilation, return_indices,
+ ceil_mode, **kwargs)(x)[0]
class Flatten(Operation):
- def __init__(self, name='Flatten', axis=1, input_sample_shape=None):
- self.PyLayer = layer.Flatten(name, axis, input_sample_shape)
- def __call__(self, x, flag=True):
- assert type(flag) is bool, 'flag can only be bool.'
- if flag:
+ def __init__(self):
+ self.PyLayer = layer.Flatten('flatten', 1)
+
+ def __call__(self, x):
+ if training:
self.flag = model_pb2.kTrain
else:
self.flag = model_pb2.kEval
@@ -554,8 +562,8 @@ class Flatten(Operation):
return self.PyLayer.layer.Backward(0, dy)[0]
-def flatten(x, name='Flatten', axis=1, input_sample_shape=None):
- return Flatten(name, axis, input_sample_shape)(x)[0]
+def flatten(x):
+ return Flatten()(x)[0]
def infer_dependency(op):