You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/08/12 11:00:33 UTC
[1/2] incubator-singa git commit: SINGA-46 Fix a bug in updater.cc to
scale the gradients
Repository: incubator-singa
Updated Branches:
refs/heads/master a8c8211f4 -> 538736c4a
SINGA-46 Fix a bug in updater.cc to scale the gradients
Scale gradients in Updater::Update() before updating parameters.
Format code in updater.h and updater.cc.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6b34ff4e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6b34ff4e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6b34ff4e
Branch: refs/heads/master
Commit: 6b34ff4e539ade046d916fa2af52af425a304f2d
Parents: d5b6a30
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Wed Aug 12 16:30:00 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Aug 12 16:32:37 2015 +0800
----------------------------------------------------------------------
include/utils/updater.h | 41 ++++++++------
src/utils/updater.cc | 130 ++++++++++++++++++++++---------------------
2 files changed, 89 insertions(+), 82 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b34ff4e/include/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/utils/updater.h b/include/utils/updater.h
index d2f4dc1..99629cf 100644
--- a/include/utils/updater.h
+++ b/include/utils/updater.h
@@ -1,55 +1,61 @@
-#ifndef INCLUDE_UTILS_UPDATER_H_
-#define INCLUDE_UTILS_UPDATER_H_
+#ifndef SINGA_UTILS_UPDATER_H_
+#define SINGA_UTILS_UPDATER_H_
+
#include "proto/job.pb.h"
#include "utils/param.h"
-namespace singa{
+namespace singa {
/**
* Updater for Param.
*/
class Updater{
public:
virtual ~Updater() {}
- virtual void Init(const UpdaterProto &proto){
- proto_=proto;
+ virtual void Init(const UpdaterProto &proto) {
+ proto_ = proto;
}
- virtual void Update(int step, Param* param, float grad_scale=1.0f)=0;
+ virtual void Update(int step, Param* param, float grad_scale = 1.0f) = 0;
float GetLearningRate(int step);
+
protected:
UpdaterProto proto_;
};
-class SGDUpdater : public Updater{
+
+class SGDUpdater : public Updater {
public:
virtual void Init(const UpdaterProto& proto);
- virtual void Update(int step, Param* param, float grad_scale=1.0f);
+ virtual void Update(int step, Param* param, float grad_scale = 1.0f);
protected:
float base_lr_;
float momentum_;
float weight_decay_;
};
-class NesterovUpdater : public Updater{
+
+class AdaGradUpdater : public Updater{
public:
virtual void Init(const UpdaterProto& proto);
- virtual void Update(int step, Param* param, float grad_scale=1.0f);
+ virtual void Update(int step, Param* param, float grad_scale = 1.0f);
protected:
float base_lr_;
- float momentum_;
+ float delta_;
float weight_decay_;
};
-class AdaGradUpdater : public Updater{
+
+
+class NesterovUpdater : public Updater {
public:
virtual void Init(const UpdaterProto& proto);
- virtual void Update(int step, Param* param, float grad_scale=1.0f);
+ virtual void Update(int step, Param* param, float grad_scale = 1.0f);
protected:
float base_lr_;
- float delta_;
+ float momentum_;
float weight_decay_;
};
-
+/*
class RMSPropUpdater : public Updater{
public:
virtual void Init(const UpdaterProto& proto);
@@ -62,7 +68,6 @@ class RMSPropUpdater : public Updater{
float weight_decay_;
};
-/*
class AdaDeltaUpdater : public Updater{
public:
virtual void Init(const UpdaterProto& proto);
@@ -74,6 +79,6 @@ class AdaDeltaUpdater : public Updater{
float weight_decay_;
};
*/
-}
+} // namespace singa
-#endif // INCLUDE_UTILS_UPDATER_H_
+#endif // SINGA_UTILS_UPDATER_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b34ff4e/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index 18e53ce..b85982e 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -3,27 +3,28 @@
#include "mshadow/tensor.h"
#include "mshadow/cxxnet_op.h"
#include "proto/job.pb.h"
+namespace singa {
+
using namespace mshadow;
using namespace mshadow::expr;
-namespace singa {
-float Updater::GetLearningRate(int step){
- float ret = 0., r = 0., base=proto_.base_lr();
- int freq=0;
+float Updater::GetLearningRate(int step) {
+ float ret = 0., r = 0., base = proto_.base_lr();
+ int freq = 0;
switch (proto_.lr_change()) {
case UpdaterProto_ChangeMethod_kFixed:
ret = base;
break;
case UpdaterProto_ChangeMethod_kLinear:
// a is init, b is the final
- freq=proto_.linear_conf().change_freq();
+ freq = proto_.linear_conf().change_freq();
r = step * 1.0 / freq;
ret = (1.0 - r) * base + r * proto_.linear_conf().final_lr();
break;
case UpdaterProto_ChangeMethod_kExponential:
// a is init, b is the final, from convnet
- freq=proto_.exponential_conf().change_freq();
+ freq = proto_.exponential_conf().change_freq();
ret = base / pow(2, step * 1. / freq);
break;
case UpdaterProto_ChangeMethod_kInverseT:
@@ -34,19 +35,19 @@ float Updater::GetLearningRate(int step){
break;
case UpdaterProto_ChangeMethod_kInverse:
// a is init, b is gamma, c is pow
- ret=base*pow(1.f+proto_.inverse_conf().gamma()*step,
- -proto_.inverse_conf().pow());
+ ret = base * pow(1.f + proto_.inverse_conf().gamma() * step,
+ - proto_.inverse_conf().pow());
break;
case UpdaterProto_ChangeMethod_kStep:
// a is the base learning rate, b is gamma, from caffe
// notice it is step/change_steps, not step*1.0/change_steps
- freq=proto_.step_conf().change_freq();
+ freq = proto_.step_conf().change_freq();
ret = base * pow(proto_.step_conf().gamma(), step / freq);
break;
case UpdaterProto_ChangeMethod_kFixedStep:
- for(int i=0;i<proto_.fixedstep_conf().step_size();i++){
- if(step>proto_.fixedstep_conf().step(i))
- ret=proto_.fixedstep_conf().step_lr(i);
+ for (int i = 0; i < proto_.fixedstep_conf().step_size(); i++) {
+ if (step > proto_.fixedstep_conf().step(i))
+ ret = proto_.fixedstep_conf().step_lr(i);
}
break;
default:
@@ -56,91 +57,93 @@ float Updater::GetLearningRate(int step){
}
/***********************SGD with momentum******************************/
-void SGDUpdater::Init(const UpdaterProto& proto){
+void SGDUpdater::Init(const UpdaterProto& proto) {
Updater::Init(proto);
- base_lr_=proto.base_lr();
- //CHECK_GT(base_lr_, 0);
- momentum_=proto.momentum();
- weight_decay_=proto.weight_decay();
+ base_lr_ = proto.base_lr();
+ momentum_ = proto.momentum();
+ weight_decay_ = proto.weight_decay();
}
-void SGDUpdater::Update(int step, Param* param, float grad_scale){
- Shape<1> s=Shape1(param->size());
+void SGDUpdater::Update(int step, Param* param, float grad_scale) {
+ Shape<1> s = Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
- float lr=GetLearningRate(step)*param->learning_rate_multiplier();
- float wd=weight_decay_*param->weight_decay_multiplier();
- if(wd>0){ // L2 regularization
- grad+=data*wd;
+ float lr = GetLearningRate(step)*param->learning_rate_multiplier();
+ float wd = weight_decay_*param->weight_decay_multiplier();
+ if (grad_scale != 1.f)
+ grad *= grad_scale;
+ if (wd > 0) { // L2 regularization, should be done after timing grad_scale
+ grad += data * wd;
}
- if(momentum_>0){
+ if (momentum_ > 0) {
Tensor<cpu, 1> history(param->mutable_cpu_history(), s);
- if(step==0) history=0;
- history=history*momentum_-lr*grad;
- data+=history;
- }else{
- grad*=-lr;
- data+=grad;
+ history = history * momentum_ - lr * grad;
+ data += history;
+ } else {
+ grad *= -lr;
+ data += grad;
}
}
/***********************Nesterov******************************/
-void NesterovUpdater::Init(const UpdaterProto& proto){
+void NesterovUpdater::Init(const UpdaterProto& proto) {
Updater::Init(proto);
- base_lr_=proto.base_lr();
+ base_lr_ = proto.base_lr();
CHECK_GT(base_lr_, 0);
- weight_decay_=proto.weight_decay();
+ weight_decay_ = proto.weight_decay();
}
-void NesterovUpdater::Update(int step, Param* param, float grad_scale){
- Shape<1> s=Shape1(param->size());
+void NesterovUpdater::Update(int step, Param* param, float grad_scale) {
+ Shape<1> s = Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
Tensor<cpu, 1> history(param->mutable_cpu_history(), s);
TensorContainer<cpu, 1> tmp(s);
- if(step==0) history=0;
- float lr=GetLearningRate(step)*param->learning_rate_multiplier();
- float wd=weight_decay_*param->weight_decay_multiplier();
- if(wd>0){ // L2 regularization
- grad+=data*wd;
+ float lr = GetLearningRate(step)*param->learning_rate_multiplier();
+ float wd = weight_decay_*param->weight_decay_multiplier();
+ if (grad_scale != 1.f)
+ grad *= grad_scale;
+ if (wd > 0) { // L2 regularization, should be done after timing grad_scale
+ grad += data * wd;
}
Copy(tmp, history);
- history=history*momentum_+lr*grad;
- tmp=history*(1+momentum_)-tmp*momentum_;
- data-=tmp;
+ history = history * momentum_ + lr * grad;
+ tmp = history * (1 + momentum_) - tmp * momentum_;
+ data -= tmp;
}
/***********************AdaGrad******************************/
-void AdaGradUpdater::Init(const UpdaterProto& proto){
+void AdaGradUpdater::Init(const UpdaterProto& proto) {
Updater::Init(proto);
- base_lr_=proto.base_lr();
+ base_lr_ = proto.base_lr();
CHECK_GT(base_lr_, 0);
- delta_=proto.delta();
- weight_decay_=proto.weight_decay();
+ delta_ = proto.delta();
+ weight_decay_ = proto.weight_decay();
}
-void AdaGradUpdater::Update(int step, Param* param, float grad_scale){
- Shape<1> s=Shape1(param->size());
+void AdaGradUpdater::Update(int step, Param* param, float grad_scale) {
+ Shape<1> s = Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
Tensor<cpu, 1> history(param->mutable_cpu_history(), s);
- if(step==0) history=0;
- history+=F<op::square>(grad*grad_scale);
- float lr=GetLearningRate(step)*param->learning_rate_multiplier();
- float wd=weight_decay_*param->weight_decay_multiplier();
- if(wd>0){ // L2 regularization
- grad+=data*wd;
+ float lr = GetLearningRate(step)*param->learning_rate_multiplier();
+ float wd = weight_decay_*param->weight_decay_multiplier();
+ if (grad_scale != 1.f)
+ grad *= grad_scale;
+ if (wd > 0) { // L2 regularization, should be done after timing grad_scale
+ grad += data * wd;
}
- data-=lr*grad/(F<op::sqrtop>(history,delta_));
+ history += F<op::square>(grad);
+ data -= lr * grad / (F<op::sqrtop>(history, delta_));
}
-/***********************RMSProp******************************/
+/***********************RMSProp******************************
void RMSPropUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
- base_lr_=proto.base_lr();
+ base_lr_ = proto.base_lr();
CHECK_GT(base_lr_, 0);
- delta_=proto.delta();
- rho_=proto.rmsprop_conf().rho();
- weight_decay_=proto.weight_decay();
+ delta_ = proto.delta();
+ rho_ = proto.rmsprop_conf().rho();
+ weight_decay_ = proto.weight_decay();
}
void RMSPropUpdater::Update(int step, Param* param, float grad_scale){
@@ -148,7 +151,6 @@ void RMSPropUpdater::Update(int step, Param* param, float grad_scale){
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
Tensor<cpu, 1> history(param->mutable_cpu_history(), s);
- if(step==0) history=0;
history=history*rho_+(1-rho_)*F<op::square>(grad*grad_scale);
float lr=GetLearningRate(step)*param->learning_rate_multiplier();
float wd=weight_decay_*param->weight_decay_multiplier();
@@ -158,7 +160,7 @@ void RMSPropUpdater::Update(int step, Param* param, float grad_scale){
data-=lr*grad/(F<op::sqrtop>(history,delta_));
}
-/***********************AdaDelta******************************
+***********************AdaDelta******************************
void AdaDeltaUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
delta_=proto.delta();
@@ -188,4 +190,4 @@ void AdaDeltaUpdater::Update(int step, Param* param, float grad_scale){
}
*/
-} /* singa */
+} // namespace singa
[2/2] incubator-singa git commit: SINGA-46 Fix a bug in updater.cc to
scale the gradients
Posted by wa...@apache.org.
SINGA-46 Fix a bug in updater.cc to scale the gradients
merge to master
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/538736c4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/538736c4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/538736c4
Branch: refs/heads/master
Commit: 538736c4ae6172d2f66fe09d9466ac878e8c8415
Parents: 6b34ff4 a8c8211
Author: wang sheng <wa...@gmail.com>
Authored: Wed Aug 12 16:48:45 2015 +0800
Committer: wang sheng <wa...@gmail.com>
Committed: Wed Aug 12 16:48:45 2015 +0800
----------------------------------------------------------------------
src/utils/common.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------