You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/17 18:02:49 UTC
[28/51] [abbrv] incubator-singa git commit: Fixed the bug leading to
wired accuracy (nan),
which was caused by forgeting to average the gradient over the whole
mini-batch. That is why we need a lower learning rate and could not use
momentum. Update the l
Fixed the bug leading to wired accuracy (nan), which was caused by forgeting
to average the gradient over the whole mini-batch. That is why we need a lower
learning rate and could not use momentum.
Update the lr in optimzier.py to time the multiplier
Fix the bug from mis-setting the pooling type of alexnet.py (max-->avg)
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d4539ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d4539ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d4539ee
Branch: refs/heads/master
Commit: 6d4539eed2ae200a3a904a70cb789fc1b39d0f38
Parents: 1db2784
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 13:13:19 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Mon Aug 15 20:16:30 2016 +0800
----------------------------------------------------------------------
examples/cifar10/alexnet.cc | 11 +-
examples/cifar10/alexnet.py | 13 +-
examples/cifar10/train.py | 19 ++-
src/model/feed_forward_net.cc | 6 +-
src/model/optimizer/sgd.cc | 4 +-
src/python/singa/__init__.py | 240 -------------------------------------
src/python/singa/layer.py | 15 +--
src/python/singa/net.py | 8 +-
src/python/singa/optimizer.py | 36 ++++--
src/python/singa/tensor.py | 8 +-
10 files changed, 68 insertions(+), 292 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index e1363e4..8051d1b 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -134,7 +134,7 @@ FeedForwardNet CreateNet() {
return net;
}
-void Train(float lr, int num_epoch, string data_dir) {
+void Train(int num_epoch, string data_dir) {
Cifar10 data(data_dir);
Tensor train_x, train_y, test_x, test_y;
{
@@ -161,11 +161,11 @@ void Train(float lr, int num_epoch, string data_dir) {
auto net = CreateNet();
SGD sgd;
OptimizerConf opt_conf;
- opt_conf.set_momentum(0.9);
+ // opt_conf.set_momentum(0.9);
auto reg = opt_conf.mutable_regularizer();
reg->set_coefficient(0.004);
sgd.Setup(opt_conf);
- sgd.SetLearningRateGenerator([lr](int step) {
+ sgd.SetLearningRateGenerator([](int step) {
if (step <= 120)
return 0.001;
else if (step <= 130)
@@ -193,14 +193,11 @@ int main(int argc, char **argv) {
int pos = singa::ArgPos(argc, argv, "-epoch");
int nEpoch = 1;
if (pos != -1) nEpoch = atoi(argv[pos + 1]);
- pos = singa::ArgPos(argc, argv, "-lr");
- float lr = 0.001;
- if (pos != -1) lr = atof(argv[pos + 1]);
pos = singa::ArgPos(argc, argv, "-data");
string data = "cifar-10-batches-bin";
if (pos != -1) data = argv[pos + 1];
LOG(INFO) << "Start training";
- singa::Train(lr, nEpoch, data);
+ singa::Train(nEpoch, data);
LOG(INFO) << "End training";
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index ddad1d5..dae129f 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -20,9 +20,6 @@ Following the same setting for hyper-parameters and data pre-processing, the fin
validation accuracy would be about 82%.
'''
-import sys
-import os
-
# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
from singa import layer
from singa import initializer
@@ -39,18 +36,18 @@ def create_net(use_cpu=False):
W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}
W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
- b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
+ b_specs = {'init': 'constant', 'value': 0, 'lr_mult': 2, 'decay_mult': 0}
net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
net.add(layer.Activation('relu1'))
- net.add(layer.LRN(name='lrn1'))
+ net.add(layer.LRN(name='lrn1', size=3, alpha=5e-5))
net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
net.add(layer.Activation('relu2'))
- net.add(layer.MaxPooling2D('pool2', 3, 2, pad=1))
- net.add(layer.LRN('lrn2'))
+ net.add(layer.AvgPooling2D('pool2', 3, 2, pad=1))
+ net.add(layer.LRN('lrn2', size=3, alpha=5e-5))
net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
net.add(layer.Activation('relu3'))
- net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
+ net.add(layer.AvgPooling2D('pool3', 3, 2, pad=1))
net.add(layer.Flatten('flat'))
net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
for (p, specs) in zip(net.param_values(), net.param_specs()):
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index de03750..2091ee5 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -22,7 +22,6 @@ includes 1 label & 3072 pixels. 3072 pixels are 3 channels of a 32x32 image
import cPickle
import numpy as np
import os
-import sys
import argparse
# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
@@ -84,7 +83,7 @@ def normalize_for_alexnet(train_x, test_x):
def vgg_lr(epoch):
- return 0.01 / float(1 << ((epoch / 30)))
+ return 0.1 / float(1 << ((epoch / 25)))
def alexnet_lr(epoch):
@@ -92,7 +91,7 @@ def alexnet_lr(epoch):
return 0.001
elif epoch < 130:
return 0.0001
- elif epoch < 140:
+ else:
return 0.00001
@@ -107,8 +106,8 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
dev = device.create_cuda_gpu()
net.to_device(dev)
- opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
- for (p, specs) in zip(net.param_values(), net.param_specs()):
+ opt = optimizer.SGD(momentum=0.9, decay=weight_decay)
+ for (p, specs) in zip(net.param_names(), net.param_specs()):
opt.register(p, specs)
tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
@@ -129,13 +128,13 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
grads, (l, a) = net.train(tx, ty)
loss += l
acc += a
- for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
- opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
+ for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
+ opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s))
# update progress bar
utils.update_progress(b * 1.0 / num_train_batch,
'training loss = %f, accuracy = %f' % (l, a))
- info = '\ntraining loss = %f, training accuracy = %f' \
- % (loss / num_train_batch, acc / num_train_batch)
+ info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
+ % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch))
print info
loss, acc = 0.0, 0.0
@@ -167,7 +166,7 @@ if __name__ == '__main__':
if args.model == 'alexnet':
train_x, test_x = normalize_for_alexnet(train_x, test_x)
net = alexnet.create_net(args.use_cpu)
- train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004,
+ train((train_x, train_y, test_x, test_y), net, 160, alexnet_lr, 0.004,
use_cpu=args.use_cpu)
else:
train_x, test_x = normalize_for_vgg(train_x, test_x)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/model/feed_forward_net.cc
----------------------------------------------------------------------
diff --git a/src/model/feed_forward_net.cc b/src/model/feed_forward_net.cc
index 514d6e2..3875430 100644
--- a/src/model/feed_forward_net.cc
+++ b/src/model/feed_forward_net.cc
@@ -206,8 +206,8 @@ const std::pair<float, float> FeedForwardNet::TrainOnBatch(int epoch,
const Tensor FeedForwardNet::Forward(int flag, const Tensor& data) {
Tensor input = data, output;
+ // LOG(INFO) << data.L1();
for (auto layer : layers_) {
- // LOG(INFO) << layer->name() << ": " << input.L1();
output = layer->Forward(flag, input);
// LOG(INFO) << layer->name() << ": " << output.L2();
input = output;
@@ -220,13 +220,13 @@ const vector<Tensor> FeedForwardNet::Backward(int flag, const Tensor& grad) {
std::stack<Tensor> buf;
Tensor tmp = grad;
for (int i = layers_.size() - 1; i >= 0; i--) {
- // LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1();
+ // LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1();
auto ret = layers_.at(i)->Backward(flag, tmp);
tmp = ret.first;
if (ret.second.size()) {
for (int k = ret.second.size() - 1; k >= 0; k--) {
buf.push(ret.second[k]);
- // LOG(INFO) << " " << buf.top().L1();
+ // LOG(INFO) << " " << buf.top().L1();
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/model/optimizer/sgd.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/sgd.cc b/src/model/optimizer/sgd.cc
index d78d5b8..ac453cd 100644
--- a/src/model/optimizer/sgd.cc
+++ b/src/model/optimizer/sgd.cc
@@ -33,6 +33,7 @@ void SGD::Setup(const OptimizerConf& conf) {
// value = value - history
void SGD::Apply(int step, float lr, const string& name, const Tensor& grad,
Tensor& value) {
+ // LOG(INFO) << "param " << name << " lr = " << lr << " grad = " << grad.L1() << " value = " << value.L1();
if (momentum_generator_) {
float mom = momentum_generator_(step);
if (mom != 0) {
@@ -46,9 +47,8 @@ void SGD::Apply(int step, float lr, const string& name, const Tensor& grad,
value -= history;
return;
}
- } else {
- Axpy(-lr, grad, &value);
}
+ Axpy(-lr, grad, &value);
}
} // namespace singa
#endif // SRC_MODEL_OPTIMIZER_SGD_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index f14c8c5..e69de29 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-
-'''
-This script is the main entrance for user to run singa inside a model workspace
-
-To use this script, user sudo install these dependencies: flask pillow and protobuf
-'''
-
-import sys, glob, os, random, shutil, time
-from flask import Flask, request, redirect, url_for
-import numpy as np
-import ConfigParser
-import urllib, traceback
-
-
-from argparse import ArgumentParser
-from argparse import RawDescriptionHelpFormatter
-sys.path.append(os.getcwd())
-
-__all__ = []
-__version__ = 0.1
-__date__ = '2016-07-20'
-__updated__ = '2016-07-20'
-__shortdesc__ = '''
-welcome to singa
-'''
-
-app = Flask(__name__)
-config = ConfigParser.RawConfigParser()
-service = {}
-data_path = "data_"
-parameter_path = "parameter_"
-
-debug = False
-
-class CLIError(Exception):
- '''Generic exception to raise and log different fatal errors.'''
- def __init__(self, msg):
- super(CLIError).__init__(type(self))
- self.msg = "E: %s" % msg
- def __str__(self):
- return self.msg
- def __unicode__(self):
- return self.msg
-
-def main(argv=None): # IGNORE:C0111
- '''Command line options.'''
-
- from . import device
-
- if argv is None:
- argv = sys.argv
- else:
- sys.argv.extend(argv)
-
- program_name = os.path.basename(sys.argv[0])
- program_version = "v%s" % __version__
- program_build_date = str(__updated__)
- program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
- program_shortdesc = __shortdesc__
- program_license = '''%s
-
- Created by dbsystem group on %s.
- Copyright 2016 NUS School of Computing. All rights reserved.
-
- Licensed under the Apache License 2.0
- http://www.apache.org/licenses/LICENSE-2.0
-
- Distributed on an "AS IS" basis without warranties
- or conditions of any kind, either express or implied.
-
-USAGE
-''' % (program_shortdesc, str(__date__))
-
- global debug
-
- try:
- # Setup argument parser
- parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
- parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
- parser.add_argument("-param", "--parameter", dest="parameter", help="the parameter file path to be loaded")
- parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
- parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
- parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
- parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
- parser.add_argument('-V', '--version', action='version', version=program_version_message)
-
- # Process arguments
- args = parser.parse_args()
-
- port = args.port
- parameter_file = args.parameter
- mode = args.mode
- need_reload = args.reload_data
- use_cpu = args.use_cpu
- debug = args.debug
-
- #prepare data files
- config.read('file.cfg')
- file_prepare(need_reload)
-
-
- import network as net
- model = net.create()
-
- #load parameter
- parameter_file=get_parameter(parameter_file)
-
- if parameter_file:
- print "load parameter file: %s" % parameter_file
- model.load(parameter_file)
-
- if use_cpu:
- raise CLIError("Currently cpu is not support!")
- else:
- print "runing with gpu"
- d = device.create_cuda_gpu()
-
- model.to_device(d)
-
- if mode == "serve":
- print "runing singa in serve mode, listen to port: %s " % port
- global service
- from serve import Service
- service =Service(model,d)
-
- app.debug = debug
- app.run(host='0.0.0.0', port= port)
- elif mode == "train":
- print "runing singa in train mode"
- global trainer
- from train import Trainer
- trainer= Trainer(model,d)
- if not parameter_file:
- trainer.initialize()
- trainer.train()
- else:
- raise CLIError("Currently only serve mode is surpported!")
- return 0
- except KeyboardInterrupt:
- ### handle keyboard interrupt ###
- return 0
- except Exception, e:
- if debug:
- traceback.print_exc()
- raise(e)
- indent = len(program_name) * " "
- sys.stderr.write(program_name + ": " + str(e) + "\n")
- sys.stderr.write(indent + " for help use --help \n\n")
- return 2
-
-def file_prepare(reload_data=False):
- '''
- download all files and generate data.py
- '''
- if not reload_data and os.path.exists("data_.py"):
- return
-
- print "download file"
- #clean data
- shutil.rmtree("data_.py",ignore_errors=True)
- shutil.rmtree("data_",ignore_errors=True)
-
- data_py=open("data_.py",'w')
- data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
- if config.has_section("data"):
- file_list = config.items("data")
- #download files
- for f in file_list:
- name,path=download_file(f[0],f[1],data_path)
- data_py.write("%s=\"%s\"\n" % (name,path))
-
- data_py.flush()
- data_py.close()
-
- if config.has_section("parameter"):
- parameter_list = config.items("parameter")
- for p in parameter_list:
- download_file(p[0],p[1],parameter_path)
-
-def download_file(name,path,dest):
- '''
- download one file to dest
- '''
- if not os.path.exists(dest):
- os.makedirs(dest)
- if (path.startswith('http')):
- file_name = path.split('/')[-1]
- target = os.path.join(dest,file_name)
- urllib.urlretrieve(path,target)
- return name,target
-
-
-def get_parameter(file_name=None):
- '''
- get the paticular file name or get the last parameter file
- '''
- if not os.path.exists(parameter_path):
- os.makedirs(parameter_path)
- return
-
- if file_name:
- return os.path.join(parameter_path,file_name)
-
- parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
- if len(parameter_list)==0:
- return
- parameter_list.sort()
-
- return parameter_list[-1]
-
-@app.route("/")
-def index():
- return "Hello SINGA User!"
-
-@app.route('/predict', methods=['POST'])
-def predict():
- if request.method == 'POST':
- try:
- response=service.serve(request)
- except Exception as e:
- return e
- return response
- return "error, should be post request"
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..1e9caeb 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -362,8 +362,8 @@ class BatchNormalization(Layer):
class LRN(Layer):
- def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
- k=1, input_sample_shape=None):
+ def __init__(self, name, size=5, alpha=1e-4, beta=0.75,
+ mode='cross_channel', k=1, input_sample_shape=None):
"""Local response normalization.
Args:
@@ -391,7 +391,7 @@ class Dense(Layer):
def __init__(self, name, num_output, use_bias=True,
W_specs=None, b_specs=None,
- W_transpose=True, input_sample_shape=None):
+ W_transpose=False, input_sample_shape=None):
"""Apply linear/affine transformation, also called inner-product or
fully connected layer.
@@ -424,10 +424,10 @@ class Dense(Layer):
W_specs['name'] = name + '_weight'
if 'name' not in b_specs:
b_specs['name'] = name + '_bias'
- self.conf.param.extend([_construct_param_specs_from_dict(W_specs)])
- self.param_specs.append(_construct_param_specs_from_dict(W_specs))
- self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
- self.param_specs.append(_construct_param_specs_from_dict(b_specs))
+ wspecs = _construct_param_specs_from_dict(W_specs)
+ bspecs = _construct_param_specs_from_dict(b_specs)
+ self.conf.param.extend([wspecs, bspecs])
+ self.param_specs.extend([wspecs, bspecs])
# dense layer is transparent to engine.
self.layer = _create_layer('singa', 'Dense')
if input_sample_shape is not None:
@@ -712,6 +712,7 @@ def _construct_param_specs_from_dict(specs):
a ParamSpec object
"""
conf = model_pb2.ParamSpec()
+ print 'convert', specs
if 'name' in specs:
conf.name = specs['name']
if 'lr_mult' in specs:
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
index f040378..3a1732c 100644
--- a/src/python/singa/net.py
+++ b/src/python/singa/net.py
@@ -95,16 +95,22 @@ class FeedForwardNet(object):
# print x.l1()
for lyr in self.layers:
x = lyr.forward(flag, x)
- # print lyr.name, x.l1()
+ # print lyr.name, x.l1()
return x
def backward(self):
grad = self.loss.backward()
+ if len(grad.shape) > 1:
+ grad /= grad.shape[0] # average across the batch
+ # print 'grad', grad.l1()
pgrads = []
for lyr in reversed(self.layers):
grad, _pgrads = lyr.backward(kTrain, grad)
+ # disp = '%f ' % grad.l1()
for g in reversed(_pgrads):
pgrads.append(g)
+ # disp = disp + ', %f ' % g.l1()
+ # print disp
return reversed(pgrads)
def save(self, f):
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index aa6bdd1..32f03d4 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -102,16 +102,19 @@ class Optimizer(object):
name (str): parameter name
specs (ParamSpec): protobuf obj
"""
- assert type(specs) == model_pb2.ParamSpec, \
- 'specs should be model_pb2.ParamSpec instance'
+ assert type(specs) == model_pb2.ParamSpec, \
+ 'specs should be model_pb2.ParamSpec instance'
if specs.HasField('regularizer'):
self.regularizers[name] = CppRegularizer(specs.regularizer)
+ elif specs.decay_mult != 1:
+ self.regularizers[name] = L2Regularizer(
+ specs.decay_mult * self.regularizer.coefficient)
+
if specs.HasField('constraint'):
self.constraints[name] = CppConstraint(specs.constraint)
+
if specs.lr_mult != 1:
self.learning_rate_multiplier[name] = specs.lr_mult
- if specs.decay_mult != 1:
- self.decay_multiplier[name] = specs.decay_mult
def apply_regularizer_constraint(self, value, grad, name=None, step=None):
"""Apply regularization and constraint if available.
@@ -129,12 +132,12 @@ class Optimizer(object):
the updated gradient Tensor
"""
if name is not None and name in self.constraints:
- self.constraints[name].apply(value, grad, step)
+ self.constraints[name].apply(step, value, grad)
elif self.constraint is not None:
self.constraint.apply(step, value, grad)
if name is not None and name in self.regularizers:
- self.regularizers[name].apply(value, grad, step)
+ self.regularizers[name].apply(step, value, grad)
elif self.regularizer is not None:
self.regularizer.apply(step, value, grad)
return grad
@@ -175,24 +178,29 @@ class Optimizer(object):
assert self.lr_gen is not None, 'Learning rate generator is not set.'\
'Either set the lr_gen in constructor or call apply_with_lr'
lr = self.lr_gen(step)
+ if name is not None and name in self.learning_rate_multiplier:
+ lr = lr * self.learning_rate_multiplier[name]
return self.apply_with_lr(step, lr, grad, value, name)
class SGD(Optimizer):
- def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
+ def __init__(self, lr=None, momentum=None, decay=None):
"""The vallina Stochasitc Gradient Descent algorithm.
See the base Optimizer for all arguments.
"""
super(SGD, self).__init__(lr, momentum, decay)
conf = model_pb2.OptimizerConf()
- conf.momentum = momentum
+ if momentum is not None:
+ conf.momentum = momentum
self.opt = singa.CreateOptimizer('SGD')
self.opt.Setup(conf.SerializeToString())
def apply_with_lr(self, step, lr, grad, value, name):
- self.apply_regularizer_constraint(step, value, grad, name)
+ self.apply_regularizer_constraint(value, grad, name, step)
+ if name is not None and name in self.learning_rate_multiplier:
+ lr = lr * self.learning_rate_multiplier[name]
self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
return value
@@ -206,6 +214,8 @@ class Nesterov(Optimizer):
"""
super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
conf = model_pb2.OptimizerConf()
+ if momentum is not None:
+ conf.momentum = momentum
self.opt = singa.CreateOptimizer('Nesterov')
self.opt.Setup(conf.SerializeToString())
@@ -232,6 +242,8 @@ class AdaGrad(Optimizer):
def apply_with_lr(self, step, lr, grad, value, name):
grad = self.apply_regularizer_constraint(step, value, grad, name)
+ if name is not None and name in self.learning_rate_multiplier:
+ lr = lr * self.learning_rate_multiplier[name]
self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
return value
@@ -255,6 +267,8 @@ class RMSProp(Optimizer):
def apply_with_lr(self, step, lr, grad, value, name):
grad = self.apply_regularizer_constraint(step, value, grad, name)
+ if name is not None and name in self.learning_rate_multiplier:
+ lr = lr * self.learning_rate_multiplier[name]
self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
return value
@@ -300,7 +314,9 @@ class L2Regularizer(Regularizer):
if coefficient is None:
assert self.coefficient is not None, 'Must set the coefficient'
coefficient = self.coefficient
- tensor.axpy(coefficient, value, grad)
+ # print coefficient, value.l1(), grad.l1()
+ if coefficient != 0:
+ tensor.axpy(coefficient, value, grad)
return grad
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index ed651e9..1d04cdf 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -177,28 +177,28 @@ class Tensor(object):
if isinstance(x, Tensor):
self.singa_tensor += x.singa_tensor
else:
- self.singa_tensor += x
+ self.singa_tensor += float(x)
return self
def __isub__(self, x):
if isinstance(x, Tensor):
self.singa_tensor -= x.singa_tensor
else:
- self.singa_tensor -= x
+ self.singa_tensor -= float(x)
return self
def __imul__(self, x):
if isinstance(x, Tensor):
self.singa_tensor *= x.singa_tensor
else:
- self.singa_tensor *= x
+ self.singa_tensor *= float(x)
return self
def __idiv__(self, x):
if isinstance(x, Tensor):
self.singa_tensor /= x.singa_tensor
else:
- self.singa_tensor /= x
+ self.singa_tensor /= float(x)
return self
'''