You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/15 16:15:25 UTC

[12/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

SINGA-237 New documentation files for SINGA v1.0

Update the layer identifier to be consistent with the documentation.
In particualr, the identifier prefix should be one of
['cudnn','singacpp', 'singacuda', 'singacl']. The complete identifier
would be <prefix>_xxxx, e.g., cudnn_convolution and
singacpp_convolution. The identifier should not have upper case letters.
If the implmentation is transparent to cpp/cuda/opencl, then register all
possible identifiers. For instance, Dropout is registered three times,
RegisterLayerClass("singacpp_dropout", Dropout)
RegisterLayerClass("singacl_dropout", Dropout)
RegisterLayerClass("singacuda_dropout", Dropout)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9c71bd67
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9c71bd67
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9c71bd67

Branch: refs/heads/dev
Commit: 9c71bd6745450019f9d4cbb748949142cf687616
Parents: cdd718e
Author: Wei Wang <wa...@gmail.com>
Authored: Sun Aug 14 23:41:03 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 19:55:17 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/train.py       |  2 +-
 examples/cifar10/alexnet.cc      |  2 +-
 examples/cifar10/alexnet.py      |  2 +-
 examples/cifar10/vgg-parallel.cc |  1 +
 examples/cifar10/vgg.py          |  4 ++--
 examples/imagenet/alexnet.cc     |  1 +
 examples/mnist/train.py          |  8 ++++----
 include/singa/core/device.h      | 15 +++++++-------
 include/singa/model/layer.h      | 13 +++++++++++-
 src/core/device/cpp_cpu.cc       |  2 +-
 src/model/layer/activation.cc    | 10 +++++++++
 src/model/layer/batchnorm.cc     |  3 +++
 src/model/layer/convolution.cc   |  1 +
 src/model/layer/dense.cc         |  3 +++
 src/model/layer/dropout.cc       |  3 +++
 src/model/layer/flatten.cc       |  3 +++
 src/model/layer/lrn.cc           |  3 +++
 src/model/layer/pooling.cc       |  1 +
 src/model/layer/prelu.cc         |  3 +++
 src/model/layer/rnn.cc           |  3 +++
 src/model/layer/softmax.cc       |  3 +++
 src/python/singa/device.py       |  4 ++--
 src/python/singa/layer.py        | 28 ++++++++++++++++----------
 src/python/singa/optimizer.py    | 38 ++++++++++++++++++-----------------
 src/python/singa/tensor.py       | 13 ++++++------
 src/python/swig/core_device.i    |  4 ++--
 test/python/test_layer.py        | 11 +++++-----
 test/singa/test_cpp_cpu.cc       |  2 +-
 test/singa/test_layer.cc         |  2 +-
 29 files changed, 123 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 1273a57..83771c2 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -128,7 +128,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
     print 'dense b ', dense_b.shape
     initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
     print 'dense weight l1 = %f' % (dense_w.l1())
-    dense_b.set_value(0.0)
+    dense_b.set_value(0)
     print 'dense b l1 = %f' % (dense_b.l1())
 
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index e1363e4..fa953f8 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -27,7 +27,7 @@
 #include "singa/utils/channel.h"
 #include "singa/utils/string.h"
 namespace singa {
-
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 34da95d..17b6a89 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -29,7 +29,7 @@ from singa import net as ffnet
 
 def create_net(use_cpu=False):
     if use_cpu:
-        layer.engine = 'singa'
+        layer.engine = 'singacpp'
 
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index 149cb21..90e9fce 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -34,6 +34,7 @@
 
 namespace singa {
 
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 const float default_wd  = 0.0005f;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index e8e3602..89c6fe8 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -38,7 +38,7 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None):
 
 def create_net(use_cpu=False):
     if use_cpu:
-        layer.engine = 'singa'
+        layer.engine = 'singacpp'
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
     net.add(layer.Dropout('drop1', 0.3))
@@ -84,7 +84,7 @@ def create_net(use_cpu=False):
             initializer.uniform(p, 0, 1)
         elif len(p.shape) > 1:
             if 'conv' in name:
-                p.gaussian(0, 0, 3 * 3 * p.shape[0])
+                initializer.gaussian(p, 0, 3 * 3 * p.shape[0])
             else:
                 p.gaussian(0, 0.02)
         else:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/imagenet/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/imagenet/alexnet.cc b/examples/imagenet/alexnet.cc
index 26b2d96..4ac1130 100644
--- a/examples/imagenet/alexnet.cc
+++ b/examples/imagenet/alexnet.cc
@@ -33,6 +33,7 @@
 #include "singa/utils/timer.h"
 namespace singa {
 
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std, float bias = .0f) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
index 43b8e26..55c7cbb 100644
--- a/examples/mnist/train.py
+++ b/examples/mnist/train.py
@@ -85,7 +85,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
             tposhidsample = tensor.gt(tposhidprob, tposhidrandom)
 
             # negative phase
-            tnegdata = tensor.mult(tposhidsample, tweight.transpose())
+            tnegdata = tensor.mult(tposhidsample, tweight.T())
             tnegdata.add_row(tvbias)
             tnegdata = tensor.sigmoid(tnegdata)
 
@@ -95,8 +95,8 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
             error = tensor.sum(tensor.square((tdata - tnegdata)))
             trainerrorsum = error + trainerrorsum
 
-            tgweight = tensor.mult(tnegdata.transpose(), tneghidprob) -\
-                    tensor.mult(tdata.transpose(), tposhidprob)
+            tgweight = tensor.mult(tnegdata.T(), tneghidprob) -\
+                    tensor.mult(tdata.T(), tposhidprob)
             tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
             tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)
 
@@ -115,7 +115,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
         initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
         tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)
 
-        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.transpose())
+        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T())
         tvalidnegdata.add_row(tvbias)
         tvalidnegdata = tensor.sigmoid(tvalidnegdata)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index a564524..810d41f 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -152,6 +152,7 @@ class CppCPU : public Device {
 
   std::shared_ptr<Device> host() const override { return defaultDevice;}
   void SetRandSeed(unsigned seed) override;
+
  protected:
   void DoExec(function<void(Context*)>&& fn, int executor) override;
 
@@ -303,10 +304,15 @@ private:
 /// If CUDA or OPENCL are not enabled, then the respective related methods should
 /// return something that indicates their absence (for example, 0 devices);
 /// however they should always be available regardless of compile-time switches.
-#ifdef USE_CUDA
 class Platform {
 public:
 
+  /// Return the defualt host device
+  static std::shared_ptr<Device> GetDefaultDevice() {
+    return defaultDevice;
+  }
+
+#ifdef USE_CUDA
   /// Return the number of total available GPUs
   static int GetNumGPUs();
 
@@ -322,11 +328,6 @@ public:
   /// Return a string containing all hardware info, e.g., version, memory size.
   static const std::string DeviceQuery(int id, bool verbose = false);
 
-  /// Return the defualt host device
-  static std::shared_ptr<Device> GetDefaultDevice() {
-    return defaultDevice;
-  }
-
   /// Create a set of CudaGPU Device using 'num_devices' free GPUs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
@@ -334,6 +335,7 @@ public:
   /// Create a set of CudaGPU Device using given GPU IDs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+#endif // USE_CUDA
 
   /// Create a \p num_devices set of valid OpenCL devices, regardless of
   /// platforms.  If there are fewer valid devices than requested, then this
@@ -373,7 +375,6 @@ private:
 #endif  // USE_OPENCL
 };
 
-#endif // USE_CUDA
 
 }  // namespace singa
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/include/singa/model/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h
index 58f0f4b..e67fcc5 100644
--- a/include/singa/model/layer.h
+++ b/include/singa/model/layer.h
@@ -222,6 +222,17 @@ class Layer {
   vector<ParamSpec> param_specs_;
 };
 
+/// Name should be formated as cudnn_xxx, singacpp_xxx, singacuda_xxx,
+/// singacl_xxx, where xxx is the real layer type, e.g., convolution, relu, etc.
+/// xxx should only have lower case letters.
+/// if the implmentation is transparent to cpp/cuda/opencl, then register all
+/// possible identifiers. For instance, Dropout is registered three times,
+/// RegisterLayerClass("singacpp_dropout", Dropout)
+/// RegisterLayerClass("singacl_dropout", Dropout)
+/// RegisterLayerClass("singacuda_dropout", Dropout)
+/// to be compatible with previous commits, the following identifier is
+/// registered. Better avoid using it, as it would be deprecated.
+/// RegisterLayerClass("singa_dropout", Dropout)
 #define RegisterLayerClass(Name, SubLayer) \
   static Registra<Layer, SubLayer> Name##SubLayer(#Name);
 
@@ -234,7 +245,7 @@ inline const std::vector<std::string> GetRegisteredLayers() {
   vector<std::string> ret;
   for (const string type : Factory<Layer>::GetIDs()) {
     auto layer = CreateLayer(type);
-    ret.push_back("Register type: " + type + " --> " + layer->layer_type());
+    ret.push_back("Register type: " + type);
   }
   return ret;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/core/device/cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/src/core/device/cpp_cpu.cc b/src/core/device/cpp_cpu.cc
index 2b3e63b..04209ab 100644
--- a/src/core/device/cpp_cpu.cc
+++ b/src/core/device/cpp_cpu.cc
@@ -22,7 +22,7 @@ namespace singa {
 
 std::shared_ptr<Device> defaultDevice=std::make_shared<CppCPU>();
 
-CppCPU::CppCPU() : Device(0, 1) {
+CppCPU::CppCPU() : Device(-1, 1) {
   lang_ = kCpp;
   //host_ = nullptr;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/activation.cc b/src/model/layer/activation.cc
index aa40edb..eb90d87 100644
--- a/src/model/layer/activation.cc
+++ b/src/model/layer/activation.cc
@@ -25,6 +25,16 @@ RegisterLayerClass(singa_relu, Activation);
 RegisterLayerClass(singa_sigmoid, Activation);
 RegisterLayerClass(singa_tanh, Activation);
 
+RegisterLayerClass(singacpp_relu, Activation);
+RegisterLayerClass(singacuda_relu, Activation);
+RegisterLayerClass(singacl_relu, Activation);
+RegisterLayerClass(singacpp_sigmoid, Activation);
+RegisterLayerClass(singacuda_sigmoid, Activation);
+RegisterLayerClass(singacl_sigmoid, Activation);
+RegisterLayerClass(singacpp_tanh, Activation);
+RegisterLayerClass(singacuda_tanh, Activation);
+RegisterLayerClass(singacl_tanh, Activation);
+
 void Activation::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   auto pos = conf.type().find_first_of('_');

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index f348661..b345c6b 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -22,6 +22,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_batchnorm, BatchNorm);
+RegisterLayerClass(singacpp_batchnorm, BatchNorm);
+RegisterLayerClass(singacuda_batchnorm, BatchNorm);
+RegisterLayerClass(singacl_batchnorm, BatchNorm);
 void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 4fc209f..0d1751d 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -24,6 +24,7 @@ namespace singa {
 using std::vector;
 
 RegisterLayerClass(singa_convolution, Convolution);
+RegisterLayerClass(singacpp_convolution, Convolution);
 void Convolution::Setup(const Shape &in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   ConvolutionConf conv_conf = conf.convolution_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 1a2d16e..7470154 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -24,6 +24,9 @@ namespace singa {
 using std::vector;
 
 RegisterLayerClass(singa_dense, Dense);
+RegisterLayerClass(singacpp_dense, Dense);
+RegisterLayerClass(singacuda_dense, Dense);
+RegisterLayerClass(singacl_dense, Dense);
 Dense::~Dense() {
   // delete weight_;
   // delete bias_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dropout.cc b/src/model/layer/dropout.cc
index 35801b4..d7397a1 100644
--- a/src/model/layer/dropout.cc
+++ b/src/model/layer/dropout.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_dropout, Dropout);
+RegisterLayerClass(singacpp_dropout, Dropout);
+RegisterLayerClass(singacuda_dropout, Dropout);
+RegisterLayerClass(singacl_dropout, Dropout);
 void Dropout::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   dropout_ratio_ = conf.dropout_conf().dropout_ratio();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/flatten.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/flatten.cc b/src/model/layer/flatten.cc
index d89361e..561c310 100644
--- a/src/model/layer/flatten.cc
+++ b/src/model/layer/flatten.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_flatten, Flatten);
+RegisterLayerClass(singacpp_flatten, Flatten);
+RegisterLayerClass(singacuda_flatten, Flatten);
+RegisterLayerClass(singacl_flatten, Flatten);
 void Flatten::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   axis_ = conf.flatten_conf().axis();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/lrn.cc b/src/model/layer/lrn.cc
index 6b5a618..4fdb5c9 100644
--- a/src/model/layer/lrn.cc
+++ b/src/model/layer/lrn.cc
@@ -23,6 +23,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_lrn, LRN);
+RegisterLayerClass(singacpp_lrn, LRN);
+RegisterLayerClass(singacuda_lrn, LRN);
+RegisterLayerClass(singacl_lrn, LRN);
 void LRN::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index 5e7ba1d..23969da 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -21,6 +21,7 @@
 namespace singa {
 
 RegisterLayerClass(singa_pooling, Pooling);
+RegisterLayerClass(singacpp_pooling, Pooling);
 void Pooling::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   PoolingConf pool_conf = conf.pooling_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/prelu.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/prelu.cc b/src/model/layer/prelu.cc
index a20972c..e567172 100644
--- a/src/model/layer/prelu.cc
+++ b/src/model/layer/prelu.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_prelu, PReLU);
+RegisterLayerClass(singacpp_prelu, PReLU);
+RegisterLayerClass(singacuda_prelu, PReLU);
+RegisterLayerClass(singacl_prelu, PReLU);
 void PReLU::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/rnn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/rnn.cc b/src/model/layer/rnn.cc
index 524b462..b811f9d 100644
--- a/src/model/layer/rnn.cc
+++ b/src/model/layer/rnn.cc
@@ -23,6 +23,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_rnn, RNN);
+RegisterLayerClass(singacpp_rnn, RNN);
+RegisterLayerClass(singacuda_rnn, RNN);
+RegisterLayerClass(singacl_rnn, RNN);
 void RNN::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc
index 6a49131..2cbd264 100644
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@ -20,6 +20,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_softmax, Softmax);
+RegisterLayerClass(singacpp_softmax, Softmax);
+RegisterLayerClass(singacuda_softmax, Softmax);
+RegisterLayerClass(singacl_softmax, Softmax);
 void Softmax::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   CHECK_EQ(in_sample.size(), 1u);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index eff6783..897fdf5 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -113,7 +113,7 @@ def create_cuda_gpu_on(device_id):
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
-
+default_device = singa.Platform.GetDefaultDevice()
 def get_default_device():
     '''Get the default host device which is a CppCPU device'''
-    return singa.Platform.GetDefaultDevice()
+    return default_device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index 0759716..b0fdb5e 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -152,9 +152,9 @@ class Layer(object):
             for t in x:
                 x.append(t.singa_tensor)
         else:
-            assert isinstance(input, tensor.Tensor), \
+            assert isinstance(x, tensor.Tensor), \
                 'input must be a Tensor or a list of Tensor'
-            xs = x
+            xs = x.singa_tensor
         y = self.layer.Forward(flag, xs)
         if type(y) == list:
             return tensor.from_raw_tensors(y)
@@ -266,7 +266,7 @@ class Conv2D(Layer):
         self.conf.param.extend([bspecs])
         self.param_specs.append(bspecs)
 
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp'])
         self.layer = _create_layer(engine, 'Convolution')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -322,7 +322,7 @@ class Pooling2D(Layer):
         conf = self.conf.pooling_conf
         conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad)
         conf.pool = mode
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp'])
         self.layer = _create_layer(engine, 'Pooling')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -439,7 +439,7 @@ class BatchNormalization(Layer):
         self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
         self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
         self.param_specs.append(_construct_param_specs_from_dict(var_specs))
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'BatchNorm')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -466,7 +466,7 @@ class LRN(Layer):
         # TODO(wangwei) enable mode = 'within_channel'
         assert mode == 'cross_channel', 'only support mode="across_channel"'
         conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'LRN')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -513,7 +513,10 @@ class Dense(Layer):
         self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
         self.param_specs.append(_construct_param_specs_from_dict(b_specs))
         # dense layer is transparent to engine.
-        self.layer = _create_layer('singa', 'Dense')
+        if engine == 'cudnn':
+            self.layer = _create_layer('singacuda', 'Dense')
+        else:
+            self.layer = _create_layer(engine, 'Dense')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 
@@ -533,7 +536,7 @@ class Dropout(Layer):
         # 'cudnn' works for v>=5.0
         #  if engine.lower() == 'cudnn':
         #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'Dropout')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -549,8 +552,8 @@ class Activation(Layer):
     """
     def __init__(self, name, mode='relu', input_sample_shape=None):
         super(Activation, self).__init__(name)
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.conf.type = (engine + '_' + mode).lower()
-        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, mode)
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -568,7 +571,7 @@ class Softmax(Layer):
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacl', 'singacuda'])
         self.layer = _create_layer(engine, 'Softmax')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -587,7 +590,10 @@ class Flatten(Layer):
         conf = self.conf.flatten_conf
         conf.axis = axis
         # fltten layer is transparent to engine
-        self.layer = _create_layer('singa', 'Flatten')
+        if engine == 'cudnn':
+            self.layer = _create_layer('singacuda', 'Flatten')
+        else:
+            self.layer = _create_layer(engine, 'Flatten')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 338c6b0..86e68af 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -54,7 +54,7 @@ class Optimizer(object):
         lr (float): a constant for the learning rate, mutually exclusive with
             'lr_gen'.
         momentum (float): a constant for the momentum value
-        decay (float): the coefficent for L2 regularizer, which is mutually
+        weight_decay (float): the coefficent for L2 regularizer, which is mutually
             exclusive with 'regularizer'.
         lr_gen (function): a function returns the learning rate given
             the current training step/epoch. It is mutually exclusive with lr.
@@ -67,7 +67,7 @@ class Optimizer(object):
             constraint would be applied inside apply_with_lr(). Users can
             also do regularization outside.
     '''
-    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
         if lr is not None:
             assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
@@ -76,10 +76,10 @@ class Optimizer(object):
                 return lr
         self.lr_gen = lr_gen
         self.momentum = momentum
-        if decay is not None:
+        if weight_decay is not None:
             assert regularizer is None, \
-                'Cannot set decay and regularizer at the same time'
-            regularizer = L2Regularizer(decay)
+                'Cannot set weight_decay and regularizer at the same time'
+            regularizer = L2Regularizer(weight_decay)
         if regularizer is not None:
             if isinstance(regularizer, model_pb2.RegularizerConf):
                 self.regularizer = CppRegularizer(regularizer)
@@ -121,7 +121,7 @@ class Optimizer(object):
         if specs.decay_mult != 1:
             self.decay_multiplier[name] = specs.decay_mult
 
-    def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+    def apply_regularizer_constraint(self, epoch, value, grad, name=None):
         '''Apply regularization and constraint if available.
 
         If there are both global regularizer (constraint) and param specific
@@ -137,12 +137,12 @@ class Optimizer(object):
             the updated gradient Tensor
         '''
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, epoch)
+            self.constraints[name].apply(epoch, value, grad)
         elif self.constraint is not None:
             self.constraint.apply(epoch, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, epoch)
+            self.regularizers[name].apply(epoch, value, grad)
         elif self.regularizer is not None:
             self.regularizer.apply(epoch, value, grad)
         return grad
@@ -193,12 +193,13 @@ class SGD(Optimizer):
     See the base Optimizer for all arguments.
     '''
 
-    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
                                   constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = self.momentum
+        if self.momentum is not None:
+            conf.momentum = self.momentum
         conf.type = 'sgd'
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
@@ -215,12 +216,13 @@ class Nesterov(Optimizer):
     See the base Optimizer for all arguments.
     '''
 
-    def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=0.9, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
                                        constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = momentum
+        if self.momentum is not None:
+            conf.momentum = momentum
         conf.type = 'nesterov'
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
@@ -239,9 +241,9 @@ class AdaGrad(Optimizer):
     Args:
         epsilon (float): small number for preventing numeric error.
     '''
-    def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+    def __init__(self, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                       constraint)
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
@@ -265,9 +267,9 @@ class RMSProp(Optimizer):
         epsilon (float): small value for preventing numeric error
     '''
 
-    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                       constraint)
         conf = model_pb2.OptimizerConf()
         conf.rho = rho

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index e2572d3..5086fdc 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -56,7 +56,7 @@ import numpy as np
 from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-import device
+import device as pydevice
 
 
 class Tensor(object):
@@ -81,7 +81,8 @@ class Tensor(object):
             assert isinstance(shape, tuple), 'shape should be tuple'
             vs = list(shape)
             if device is None:
-                self.singa_tensor = singa.Tensor(vs, dtype)
+                device = pydevice.get_default_device()
+                self.singa_tensor = singa.Tensor(vs, device, dtype)
             else:
                 self.singa_tensor = singa.Tensor(vs, device, dtype)
             self.shape = shape
@@ -225,12 +226,10 @@ class Tensor(object):
         '''
         return _call_singa_func(self.singa_tensor.T)
 
-    '''
     def copy(self):
-        shallow copy
-            call copy constructor of singa::Tensor
+        '''shallow copy calls copy constructor of singa::Tensor
+        '''
         return _call_singa_func(singa.Tensor, self.singa_tensor)
-    '''
 
     def deepcopy(self):
         '''Same as clone().
@@ -513,7 +512,7 @@ def to_numpy(t):
     Returns:
         a numpy array
     '''
-    assert t.device == device.get_default_device() or t.device is None, \
+    assert (t.device.id() == -1) or (t.device is None), \
         'Please move the tensor onto the default host device'
 
     if t.dtype == core_pb2.kFloat32:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/swig/core_device.i
----------------------------------------------------------------------
diff --git a/src/python/swig/core_device.i b/src/python/swig/core_device.i
index 21b97b4..b3521be 100644
--- a/src/python/swig/core_device.i
+++ b/src/python/swig/core_device.i
@@ -49,9 +49,9 @@ class Device {
   int id() const;
 };
 
-#if USE_CUDA
 class Platform {
  public:
+#if USE_CUDA
   static int GetNumGPUs();
   static const std::vector<int> GetGPUIDs();
   static const std::pair<size_t, size_t> GetGPUMemSize(const int device);
@@ -61,9 +61,9 @@ class Platform {
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+#endif // USE_CUDA
   static std::shared_ptr<Device> GetDefaultDevice();
 };
 
-#endif // USE_CUDA
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/python/test_layer.py
----------------------------------------------------------------------
diff --git a/test/python/test_layer.py b/test/python/test_layer.py
index 7078240..441f352 100644
--- a/test/python/test_layer.py
+++ b/test/python/test_layer.py
@@ -25,6 +25,7 @@ class TestPythonLayer(unittest.TestCase):
                          )
 
     def setUp(self):
+        layer.engine='singacpp'
         self.w = {'init': 'Xavier', 'regularizer': 1e-4}
         self.b = {'init': 'Constant', 'value': 0}
         self.sample_shape = None
@@ -40,8 +41,8 @@ class TestPythonLayer(unittest.TestCase):
         in_sample_shape = (1, 3, 3)
         conv = layer.Conv2D('conv', 1, 3, 2, W_specs=self.w, b_specs=self.b,
                             pad=1, input_sample_shape=in_sample_shape)
-        cuda = device.create_cuda_gpu()
-        conv.to_device(cuda)
+        # cuda = device.create_cuda_gpu()
+        # conv.to_device(cuda)
         params = conv.param_values()
 
         raw_x = np.arange(9, dtype=np.float32) + 1
@@ -51,9 +52,9 @@ class TestPythonLayer(unittest.TestCase):
         params[0].copy_from_numpy(w)
         params[1].set_value(1.0)
 
-        x.to_device(cuda)
+        # x.to_device(cuda)
         y = conv.forward(model_pb2.kTrain, x)
-        y.to_host()
+        # y.to_host()
         npy = tensor.to_numpy(y).flatten()
 
         self.assertAlmostEqual(3.0, npy[0])
@@ -63,7 +64,7 @@ class TestPythonLayer(unittest.TestCase):
 
         dy = np.asarray([0.1, 0.2, 0.3, 0.4], dtype=np.float32).reshape(y.shape)
         grad = tensor.from_numpy(dy)
-        grad.to_device(cuda)
+        # grad.to_device(cuda)
         (dx, [dw, db]) = conv.backward(model_pb2.kTrain, grad)
         dx.to_host()
         dw.to_host()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/singa/test_cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cpp_cpu.cc b/test/singa/test_cpp_cpu.cc
index 35bd108..5f3308a 100644
--- a/test/singa/test_cpp_cpu.cc
+++ b/test/singa/test_cpp_cpu.cc
@@ -27,7 +27,7 @@ using singa::CppCPU;
 using singa::Block;
 TEST(CppCPU, Constructor) {
   CppCPU dev;
-  EXPECT_EQ(0, dev.id());
+  EXPECT_EQ(-1, dev.id());
 }
 
 TEST(CppCPU, MemoryMallocFree) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/singa/test_layer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_layer.cc b/test/singa/test_layer.cc
index aa01746..bb33dba 100644
--- a/test/singa/test_layer.cc
+++ b/test/singa/test_layer.cc
@@ -7,7 +7,7 @@ TEST(Layer, CreateLayer) {
       "convolution", "dense", "dropout", "relu", "batchnorm",
       "flatten",     "lrn",   "pooling", "prelu",      "softmax"};
   for (auto type : types) {
-    auto layer = singa::CreateLayer("singa_" + type);
+    auto layer = singa::CreateLayer("singacpp_" + type);
     // EXPECT_EQ(layer->layer_type(), type);
   }
 }