You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2018/07/16 03:13:28 UTC
[2/4] incubator-singa git commit: SINGA-385 Add new python module for
optimizers
SINGA-385 Add new python module for optimizers
Add the base optimizer and SGD (with momentum).
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/117dfcfd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/117dfcfd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/117dfcfd
Branch: refs/heads/master
Commit: 117dfcfd052bb92142a30b59fc173a2ef6480332
Parents: 2b5c3f7
Author: Wang Wei <wa...@gmail.com>
Authored: Sat Jul 14 13:07:52 2018 +0800
Committer: Wang Wei <wa...@gmail.com>
Committed: Mon Jul 16 10:04:54 2018 +0800
----------------------------------------------------------------------
examples/autograd/resnet.py | 117 ++++++++++++++++++++++++++++--
python/singa/autograd.py | 13 ++++
python/singa/opt.py | 152 +++++++++++++++++++++++++++++++++++++++
python/singa/tensor.py | 12 ++++
4 files changed, 287 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/117dfcfd/examples/autograd/resnet.py
----------------------------------------------------------------------
diff --git a/examples/autograd/resnet.py b/examples/autograd/resnet.py
index 930d9e0..f1fb9d6 100644
--- a/examples/autograd/resnet.py
+++ b/examples/autograd/resnet.py
@@ -23,6 +23,10 @@
from singa import autograd
from singa import tensor
from singa import device
+from singa import utils
+from singa import optimizer
+
+import numpy as np
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
@@ -60,7 +64,7 @@ class BasicBlock(autograd.Layer):
if self.downsample is not None:
residual = self.downsample(x)
- out += residual
+ out = autograd.add(out, residual)
out = autograd.relu(out)
return out
@@ -101,7 +105,7 @@ class Bottleneck(autograd.Layer):
if self.downsample is not None:
residual = self.downsample(x)
- out += residual
+ out = autograd.add(out, residual)
out = autograd.relu(out)
return out
@@ -217,10 +221,109 @@ def resnet152(pretrained=False, **kwargs):
return model
-if __name__ == '__main__':
+def load_dataset(filepath):
+ print('Loading data file %s' % filepath)
+ with open(filepath, 'rb') as fd:
+ try:
+ cifar10 = pickle.load(fd, encoding='latin1')
+ except TypeError:
+ cifar10 = pickle.load(fd)
+ image = cifar10['data'].astype(dtype=np.uint8)
+ image = image.reshape((-1, 3, 32, 32))
+ label = np.asarray(cifar10['labels'], dtype=np.uint8)
+ label = label.reshape(label.size, 1)
+ return image, label
+
+
+def load_train_data(dir_path, num_batches=5):
+ labels = []
+ batchsize = 10000
+ images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
+ for did in range(1, num_batches + 1):
+ fname_train_data = dir_path + "/data_batch_{}".format(did)
+ image, label = load_dataset(fname_train_data)
+ images[(did - 1) * batchsize:did * batchsize] = image
+ labels.extend(label)
+ images = np.array(images, dtype=np.float32)
+ labels = np.array(labels, dtype=np.int32)
+ return images, labels
+
+
+def load_test_data(dir_path):
+ images, labels = load_dataset(dir_path + "/test_batch")
+ return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def accuracy(pred, target):
+ y = np.argmax(pred, axis=1)
+ t = np.argmax(target, axis=1)
+ a = y == t
+ return np.array(a, 'int').sum() / float(len(t))
+
+
+def train(data, net, max_epoch, get_lr, weight_decay=1e-5, batch_size=100):
+ print('Start intialization............')
+ dev = device.create_cuda_gpu()
+
+ opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
+
+ tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
+ ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+ train_x, train_y, test_x, test_y = data
+ num_train_batch = train_x.shape[0] // batch_size
+ num_test_batch = test_x.shape[0] // batch_size
+ idx = np.arange(train_x.shape[0], dtype=np.int32)
+ for epoch in range(max_epoch):
+ np.random.shuffle(idx)
+ loss, acc = 0.0, 0.0
+ print('Epoch %d' % epoch)
+ autograd.training = True
+ for b in range(num_train_batch):
+ x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
+ y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ x = net(tx)
+ loss = autograd.softmax_cross_entropy(x, ty)
+ np_loss = tensor.to_numpy(loss)
+ acc += accuracy(tensor.to_numpy(x), y)
+
+ for p, g in autograd.backwards(loss):
+ opt.apply_with_lr(epoch, get_lr(epoch), g, p)
+ # update progress bar
+ utils.update_progress(b * 1.0 / num_train_batch,
+ 'training loss = %f' % (np_loss[0]))
+
+ loss, acc = 0.0, 0.0
+ autograd.training = True
+ for b in range(num_test_batch):
+ x = test_x[b * batch_size: (b + 1) * batch_size]
+ y = test_y[b * batch_size: (b + 1) * batch_size]
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ x = net(tx)
+ l = autograd.softmax_cross_entropy(x, ty)
+ loss += tensor.to_numpy(l)[0]
+ acc += accuracy(x, y)
+
+ print('test loss = %f, test accuracy = %f' %
+ ((loss / num_test_batch), (acc / num_test_batch)))
+
+
+def resnet_lr(epoch):
+ if epoch < 81:
+ return 0.1
+ elif epoch < 122:
+ return 0.01
+ else:
+ return 0.001
+
+if __name__ == '__main__':
model = resnet18()
- x = tensor.Tensor((16, 3, 224, 224), device.create_cuda_gpu())
- x.set_value(float(0.1))
- autograd.training = True
- y = model(x)
+ train_x, train_y = load_train_data()
+ test_x, test_y = load_test_data()
+ mean = np.average(train_x, axis=0)
+ train_x -= mean
+ test_x -= mean
+ train(model, (train_x, train_y, test_x, test_y), 10, resnet_lr)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/117dfcfd/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index c77c174..63e3771 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -347,6 +347,19 @@ def add_bias(x, b, axis=0):
return AddBias(axis)(x, b)[0]
+class Add(Operation):
+
+ def forward(self, a, b):
+ return a + b
+
+ def backward(self, dy):
+ return dy, dy
+
+
+def add(a, b):
+ return Add()(a, b)[0]
+
+
class SoftMax(Operation):
'''
Apply SoftMax for each row of the Tensor or each column of the Tensor
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/117dfcfd/python/singa/opt.py
----------------------------------------------------------------------
diff --git a/python/singa/opt.py b/python/singa/opt.py
new file mode 100644
index 0000000..bf04b09
--- /dev/null
+++ b/python/singa/opt.py
@@ -0,0 +1,152 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+'''This module includes a set of optimizers for updating model parameters.
+It replaces the old optimizers from optimizer.py'''
+
+from singa import tensor
+
+
+class Optimizer(object):
+ r"""Base optimizer.
+
+ Args:
+ config (Dict): specify the default values of configurable variables.
+ """
+
+ def __init__(self, config):
+ self.config = config
+ self.step = 0
+ self.param2config = {}
+
+ def update(self, param, grad):
+ r"""Update the param values with given gradients.
+
+ Args:
+ param(Tensor): param values to be updated in-place
+ grad(Tensor): param gradients; the values may be updated
+ in this function; do not use it anymore
+ """
+ pass
+
+ def step(self):
+ r"""To increment the step counter"""
+ self.step += 1
+
+ def register(self, param_group, config):
+ for param in param_group:
+ assert param not in self.param2config, 'param is already registered'
+
+ self.param2config[param] = config
+
+ def load(self):
+ pass
+
+ def save(self):
+ pass
+
+
+class SGD(Optimizer):
+ r"""Implements stochastic gradient descent (optionally with momentum).
+
+ Nesterov momentum is based on the formula from
+ `On the importance of initialization and momentum in deep learning`__.
+
+ Args:
+ lr(float): learning rate
+ momentum(float, optional): momentum factor(default: 0)
+ weight_decay(float, optional): weight decay(L2 penalty)(default: 0)
+ dampening(float, optional): dampening for momentum(default: 0)
+ nesterov(bool, optional): enables Nesterov momentum(default: False)
+
+ Example:
+ >> > from singa import opt
+ >> > optimizer = opt.SGD(lr=0.1, momentum=0.9)
+ >> > optimizer.update()
+
+ __ http: // www.cs.toronto.edu / %7Ehinton / absps / momentum.pdf
+
+ .. note::
+ The implementation of SGD with Momentum / Nesterov subtly differs from
+ Sutskever et. al. and implementations in some other frameworks.
+
+ Considering the specific case of Momentum, the update can be written as
+
+ .. math::
+ v = \rho * v + g \\
+ p = p - lr * v
+
+ where p, g, v and: math: `\rho` denote the parameters, gradient,
+ velocity, and momentum respectively.
+
+ This is in contrast to Sutskever et. al. and
+ other frameworks which employ an update of the form
+
+ .. math::
+ v = \rho * v + lr * g \\
+ p = p - v
+
+ The Nesterov version is analogously modified.
+ """
+
+ def __init__(self, lr=0.1, momentum=0, dampening=0,
+ weight_decay=0, nesterov=False):
+ if momentum < 0.0:
+ raise ValueError("Invalid momentum value: {}".format(momentum))
+ if weight_decay < 0.0:
+ raise ValueError(
+ "Invalid weight_decay value: {}".format(weight_decay))
+
+ defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
+ weight_decay=weight_decay, nesterov=nesterov)
+ if nesterov and (momentum <= 0 or dampening != 0):
+ raise ValueError(
+ "Nesterov momentum requires a momentum and zero dampening")
+ super(SGD, self).__init__(defaults)
+
+ def update(self, param, grad):
+ """Performs a single optimization step.
+
+ Arguments:
+ param(Tensor): param values to be update in-place
+ grad(Tensor): param gradients; the values may be updated
+ in this function; cannot use it anymore
+ """
+ group = self.param2group[param]
+ weight_decay = group['weight_decay']
+ momentum = group['momentum']
+ dampening = group['dampening']
+ nesterov = group['nesterov']
+
+ if weight_decay != 0:
+ grad += param * weight_decay
+ if momentum != 0:
+ param_state = self.state[param]
+ if 'momentum_buffer' not in param_state:
+ buf = param_state[
+ 'momentum_buffer'] = tensor.zeros_like(param)
+ buf *= momentum
+ buf += grad
+ else:
+ buf = param_state['momentum_buffer']
+ buf *= momentum
+ buf += (1 - dampening) * grad
+ if nesterov:
+ grad += momentum * buf
+ else:
+ grad = buf
+ param -= grad * group['lr']
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/117dfcfd/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 46a47b7..441431f 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -602,6 +602,18 @@ def from_raw_tensors(tt):
return ret
+def zeros_like(t):
+ ret = Tensor(t.shape, t.device, t.dtype)
+ ret.set_value(float(0))
+ return ret
+
+
+def ones_like(t):
+ ret = Tensor(t.shape, t.device, t.dtype)
+ ret.set_value(float(1))
+ return ret
+
+
def product(shape):
return reduce(lambda x, y: x * y, shape)